1 files changed, 8737 insertions, 2727 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index fa85f3108f2..e960a3d7c45 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2015 Oracle and/or its affiliates.
+   Copyright (c) 2009, 2015 MariaDB
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -47,43 +48,47 @@
 #include "records.h"             // init_read_record, end_read_record
 #include "filesort.h"            // filesort_free_buffers
 #include "sql_union.h"           // mysql_union
+#include "opt_subselect.h"
+#include "log_slow.h"
+#include "sql_derived.h"
+
 #include "debug_sync.h"          // DEBUG_SYNC
 #include <m_ctype.h>
 #include <my_bit.h>
 #include <hash.h>
 #include <ft_global.h>
 
-#define PREV_BITS(type,A)	((type) (((type) 1 << (A)) -1))
-
 const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
 			      "MAYBE_REF","ALL","range","index","fulltext",
 			      "ref_or_null","unique_subquery","index_subquery",
-                              "index_merge"
-};
+                              "index_merge", "hash_ALL", "hash_range",
+                              "hash_index", "hash_index_merge" };
+
+const char *copy_to_tmp_table= "Copying to tmp table";
 
 struct st_sargable_param;
 
 static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array);
-static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds,
-				 DYNAMIC_ARRAY *keyuse);
+static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves, 
+                                 COND *conds, DYNAMIC_ARRAY *keyuse);
 static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
                                 JOIN_TAB *join_tab,
                                 uint tables, COND *conds,
-                                COND_EQUAL *cond_equal,
                                 table_map table_map, SELECT_LEX *select_lex,
                                 st_sargable_param **sargables);
+static bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse,
+                                   bool skip_unprefixed_keyparts);
 static int sort_keyuse(KEYUSE *a,KEYUSE *b);
-static void set_position(JOIN *join,uint index,JOIN_TAB *table,KEYUSE *key);
+static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables);
 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
-			       table_map used_tables);
-static bool choose_plan(JOIN *join,table_map join_tables);
-
-static void best_access_path(JOIN *join, JOIN_TAB *s, THD *thd,
-                             table_map remaining_tables, uint idx,
-                             double record_count, double read_time);
+			       bool allow_full_scan, table_map used_tables);
+void best_access_path(JOIN *join, JOIN_TAB *s, 
+                             table_map remaining_tables, uint idx, 
+                             bool disable_jbuf, double record_count,
+                             POSITION *pos, POSITION *loose_scan_pos);
 static void optimize_straight_join(JOIN *join, table_map join_tables);
 static bool greedy_search(JOIN *join, table_map remaining_tables,
-                             uint depth, uint prune_level);
+                          uint depth, uint prune_level);
 static bool best_extension_by_limited_search(JOIN *join,
                                              table_map remaining_tables,
                                              uint idx, double record_count,
@@ -91,8 +96,9 @@ static bool best_extension_by_limited_search(JOIN *join,
                                              uint prune_level);
 static uint determine_search_depth(JOIN* join);
 C_MODE_START
-static int join_tab_cmp(const void* ptr1, const void* ptr2);
-static int join_tab_cmp_straight(const void* ptr1, const void* ptr2);
+static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
 C_MODE_END
 /*
   TODO: 'find_best' is here only temporarily until 'greedy_search' is
@@ -101,66 +107,68 @@ C_MODE_END
 static bool find_best(JOIN *join,table_map rest_tables,uint index,
 		      double record_count,double read_time);
 static uint cache_record_length(JOIN *join,uint index);
-static double prev_record_reads(JOIN *join, uint idx, table_map found_ref);
-static bool get_best_combination(JOIN *join);
+bool get_best_combination(JOIN *join);
 static store_key *get_store_key(THD *thd,
 				KEYUSE *keyuse, table_map used_tables,
 				KEY_PART_INFO *key_part, uchar *key_buff,
 				uint maybe_null);
-static void make_outerjoin_info(JOIN *join);
+static bool make_outerjoin_info(JOIN *join);
+static Item*
+make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, 
+                    table_map sjm_tables, bool inside_or_clause);
 static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
-static void make_join_readinfo(JOIN *join, ulonglong options);
+static void revise_cache_usage(JOIN_TAB *join_tab);
+static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after);
 static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
 static void update_depend_map(JOIN *join);
-static void update_depend_map(JOIN *join, ORDER *order);
+static void update_depend_map_for_order(JOIN *join, ORDER *order);
 static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond,
 			   bool change_list, bool *simple_order);
-static int return_zero_rows(JOIN *join, select_result *res,TABLE_LIST *tables,
+static int return_zero_rows(JOIN *join, select_result *res, 
+                            List<TABLE_LIST> &tables,
                             List<Item> &fields, bool send_row,
                             ulonglong select_options, const char *info,
-                            Item *having);
-static COND *build_equal_items(THD *thd, COND *cond,
+                            Item *having, List<Item> &all_fields);
+static COND *build_equal_items(JOIN *join, COND *cond,
                                COND_EQUAL *inherited,
                                List<TABLE_LIST> *join_list,
+                               bool ignore_on_conds,
                                COND_EQUAL **cond_equal_ref);
-static COND* substitute_for_best_equal_field(COND *cond,
+static COND* substitute_for_best_equal_field(JOIN_TAB *context_tab,
+                                             COND *cond,
                                              COND_EQUAL *cond_equal,
                                              void *table_join_idx);
 static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
-                            COND *conds, bool top);
+                            COND *conds, bool top, bool in_sj);
 static bool check_interleaving_with_nj(JOIN_TAB *next);
 static void restore_prev_nj_state(JOIN_TAB *last);
-static void reset_nj_counters(List<TABLE_LIST> *join_list);
+static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list);
 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
                                           uint first_unused);
 
 static COND *optimize_cond(JOIN *join, COND *conds,
                            List<TABLE_LIST> *join_list,
-			   Item::cond_result *cond_value);
-static bool open_tmp_table(TABLE *table);
-static bool create_myisam_tmp_table(TABLE *,TMP_TABLE_PARAM *, ulonglong, my_bool);
+                           bool ignore_on_conds,
+			   Item::cond_result *cond_value, 
+                           COND_EQUAL **cond_equal);
+bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
+static bool create_internal_tmp_table_from_heap2(THD *, TABLE *,
+                                     ENGINE_COLUMNDEF *, ENGINE_COLUMNDEF **, 
+                                     int, bool, handlerton *, const char *, bool *);
 static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
 		     Procedure *proc);
 
-static enum_nested_loop_state
-evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
-                     int error);
+static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int);
 static enum_nested_loop_state
 evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab);
 static enum_nested_loop_state
-flush_cached_records(JOIN *join, JOIN_TAB *join_tab, bool skip_last);
-static enum_nested_loop_state
 end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
-end_send_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
-static enum_nested_loop_state
 end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
-static enum_nested_loop_state
-end_write_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 
 static int test_if_group_changed(List<Cached_item> &list);
 static int join_read_const_table(JOIN_TAB *tab, POSITION *pos);
@@ -174,7 +182,7 @@ static int join_no_more_records(READ_RECORD *info);
 static int join_read_next(READ_RECORD *info);
 static int join_init_quick_read_record(JOIN_TAB *tab);
 static int test_if_quick_select(JOIN_TAB *tab);
-static int join_init_read_record(JOIN_TAB *tab);
+static bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab);
 static int join_read_first(JOIN_TAB *tab);
 static int join_read_next(READ_RECORD *info);
 static int join_read_next_same(READ_RECORD *info);
@@ -185,8 +193,19 @@ static int join_ft_read_first(JOIN_TAB *tab);
 static int join_ft_read_next(READ_RECORD *info);
 int join_read_always_key_or_null(JOIN_TAB *tab);
 int join_read_next_same_or_null(READ_RECORD *info);
-static COND *make_cond_for_table(COND *cond,table_map table,
-				 table_map used_table);
+static COND *make_cond_for_table(THD *thd, Item *cond,table_map table,
+                                 table_map used_table,
+                                 int join_tab_idx_arg,
+                                 bool exclude_expensive_cond,
+                                 bool retain_ref_cond);
+static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
+                                           Item *cond,
+                                           table_map tables,
+                                           table_map used_table,
+                                           int join_tab_idx_arg,
+                                           bool exclude_expensive_cond,
+                                           bool retain_ref_cond);
+
 static Item* part_of_refkey(TABLE *form,Field *field);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
@@ -199,7 +218,7 @@ static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
                                      uint *saved_best_key_parts= NULL);
 static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
 				    ha_rows select_limit, bool no_changes,
-                                    key_map *map);
+                                    const key_map *map);
 static bool list_contains_unique_index(TABLE *table,
                           bool (*find_func) (Field *, void *), void *data);
 static bool find_field_in_item_list (Field *field, void *data);
@@ -210,18 +229,11 @@ static int create_sort_index(THD *thd, JOIN *join, ORDER *order,
 static int remove_duplicates(JOIN *join,TABLE *entry,List<Item> &fields,
 			     Item *having);
 static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
-				   ulong offset,Item *having);
+				   Item *having);
 static int remove_dup_with_hash_index(THD *thd,TABLE *table,
 				      uint field_count, Field **first_field,
-
 				      ulong key_length,Item *having);
-static int join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count);
-static ulong used_blob_length(CACHE_FIELD **ptr);
-static bool store_record_in_cache(JOIN_CACHE *cache);
-static void reset_cache_read(JOIN_CACHE *cache);
-static void reset_cache_write(JOIN_CACHE *cache);
-static void read_cached_record(JOIN_TAB *tab);
-static bool cmp_buffer_with_ref(JOIN_TAB *tab);
+static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
 static bool setup_new_fields(THD *thd, List<Item> &fields,
 			     List<Item> &all_fields, ORDER *new_order);
 static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
@@ -229,7 +241,8 @@ static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
                                     List<Item> &all_fields,
 				    bool *all_order_by_fields_used);
 static bool test_if_subpart(ORDER *a,ORDER *b);
-static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
+static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables, 
+                                table_map const_tables);
 static void calc_group_buffer(JOIN *join,ORDER *group);
 static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
 static bool alloc_group_fields(JOIN *join,ORDER *group);
@@ -253,10 +266,19 @@ static bool init_sum_functions(Item_sum **func, Item_sum **end);
 static bool update_sum_func(Item_sum **func);
 static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
 			    bool distinct, const char *message=NullS);
-static Item *remove_additional_cond(Item* conds);
 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
-static bool test_if_ref(Item_field *left_item,Item *right_item);
+static uint make_join_orderinfo(JOIN *join);
+static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array);
 
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
+JOIN_TAB *first_depth_first_tab(JOIN* join);
+JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab);
+
+enum enum_exec_or_opt {WALK_OPTIMIZATION_TABS , WALK_EXECUTION_TABS};
+JOIN_TAB *first_breadth_first_tab(JOIN *join, enum enum_exec_or_opt tabs_kind);
+JOIN_TAB *next_breadth_first_tab(JOIN *join, enum enum_exec_or_opt tabs_kind,
+                                 JOIN_TAB *tab);
 
 /**
   This handles SELECT with and without UNION.
@@ -301,6 +323,21 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
   res|= thd->is_error();
   if (unlikely(res))
     result->abort_result_set();
+  if (thd->killed == ABORT_QUERY)
+  {
+    /*
+      If LIMIT ROWS EXAMINED interrupted query execution, issue a warning,
+      continue with normal processing and produce an incomplete query result.
+    */
+    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                        ER_QUERY_EXCEEDED_ROWS_EXAMINED_LIMIT,
+                        ER(ER_QUERY_EXCEEDED_ROWS_EXAMINED_LIMIT),
+                        thd->accessed_rows_and_keys,
+                        thd->lex->limit_rows_examined->val_uint());
+    thd->reset_killed();
+  }
+  /* Disable LIMIT ROWS EXAMINED after query execution. */
+  thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX;
 
   MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
   DBUG_RETURN(res);
@@ -339,7 +376,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
         function is, in turn, aggregated in the query block where the outer
         field was resolved or some query nested therein, then the
         Item_direct_ref class should be used. Also it should be used if we are
-        grouping by a subquery containing the outer field.
+        grouping by a subquery that references this outer field.
 
     The resolution is done here and not at the fix_fields() stage as
     it can be done only after aggregate functions are fixed and pulled up to
@@ -358,11 +395,23 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
 
 bool
 fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
-                 Item **ref_pointer_array, ORDER *group_list)
+                 Item **ref_pointer_array)
 {
   Item_outer_ref *ref;
 
-  List_iterator<Item_outer_ref> ref_it(select->inner_refs_list);
+  /*
+    Mark the references from  the inner_refs_list that are occurred in
+    the group by expressions. Those references will contain direct
+    references to the referred fields. The markers are set in 
+    the found_in_group_by field of the references from the list.
+  */
+  List_iterator_fast <Item_outer_ref> ref_it(select->inner_refs_list);
+  for (ORDER *group= select->join->group_list; group;  group= group->next)
+  {
+    (*group->item)->walk(&Item::check_inner_refs_processor,
+                         TRUE, (uchar *) &ref_it);
+  } 
+    
   while ((ref= ref_it++))
   {
     bool direct_ref= false;
@@ -407,22 +456,9 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
         }
       }
     }
-    else
-    {
-      /*
-        Check if GROUP BY item trees contain the outer ref:
-        in this case we have to use Item_direct_ref instead of Item_ref.
-      */
-      for (ORDER *group= group_list; group; group= group->next)
-      {
-        if ((*group->item)->walk(&Item::find_item_processor, TRUE,
-                                 (uchar *) ref))
-        {
-          direct_ref= TRUE;
-          break;
-        }
-      }
-    }
+    else if (ref->found_in_group_by)
+      direct_ref= TRUE;
+
     new_ref= direct_ref ?
               new Item_direct_ref(ref->context, item_ref, ref->table_name,
                           ref->field_name, ref->alias_name_used) :
@@ -441,11 +477,86 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
 }
 
 /**
+   The following clauses are redundant for subqueries:
+
+   DISTINCT
+   GROUP BY   if there are no aggregate functions and no HAVING
+              clause
+
+   Because redundant clauses are removed both from JOIN and
+   select_lex, the removal is permanent. Thus, it only makes sense to
+   call this function for normal queries and on first execution of
+   SP/PS
+
+   @param subq_select_lex   select_lex that is part of a subquery 
+                            predicate. This object and the associated 
+                            join is modified.
+*/
+
+static
+void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex)
+{
+  DBUG_ENTER("remove_redundant_subquery_clauses");
+  Item_subselect *subq_predicate= subq_select_lex->master_unit()->item;
+  /*
+    The removal should happen for IN, ALL, ANY and EXISTS subqueries,
+    which means all but single row subqueries. Example single row
+    subqueries: 
+       a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>) 
+       b) SELECT a, (<single row subquery) FROM t1
+   */
+  if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS)
+    DBUG_VOID_RETURN;
+
+  /* A subquery that is not single row should be one of IN/ALL/ANY/EXISTS. */
+  DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS ||
+               subq_predicate->is_in_predicate());
+
+  if (subq_select_lex->options & SELECT_DISTINCT)
+  {
+    subq_select_lex->join->select_distinct= false;
+    subq_select_lex->options&= ~SELECT_DISTINCT;
+    DBUG_PRINT("info", ("DISTINCT removed"));
+  }
+
+  /*
+    Remove GROUP BY if there are no aggregate functions and no HAVING
+    clause
+  */
+  if (subq_select_lex->group_list.elements &&
+      !subq_select_lex->with_sum_func && !subq_select_lex->join->having)
+  {
+    for (ORDER *ord= subq_select_lex->group_list.first; ord; ord= ord->next)
+    {
+      (*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
+    }
+    subq_select_lex->join->group_list= NULL;
+    subq_select_lex->group_list.empty();
+    DBUG_PRINT("info", ("GROUP BY removed"));
+  }
+
+  /*
+    TODO: This would prevent processing quries with ORDER BY ... LIMIT
+    therefore we disable this optimization for now.
+    Remove GROUP BY if there are no aggregate functions and no HAVING
+    clause
+  if (subq_select_lex->group_list.elements &&
+      !subq_select_lex->with_sum_func && !subq_select_lex->join->having)
+  {
+    subq_select_lex->join->group_list= NULL;
+    subq_select_lex->group_list.empty();
+  }
+  */
+  DBUG_VOID_RETURN;
+}
+
+
+/**
   Function to setup clauses without sum functions.
 */
 inline int setup_without_group(THD *thd, Item **ref_pointer_array,
 			       TABLE_LIST *tables,
-			       TABLE_LIST *leaves,
+			       List<TABLE_LIST> &leaves,
 			       List<Item> &fields,
 			       List<Item> &all_fields,
 			       COND **conds,
@@ -483,6 +594,7 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array,
   mysql_select assumes that all tables are already opened
 *****************************************************************************/
 
+
 /**
   Prepare of whole select (including sub queries in future).
 
@@ -499,8 +611,8 @@ int
 JOIN::prepare(Item ***rref_pointer_array,
 	      TABLE_LIST *tables_init,
 	      uint wild_num, COND *conds_init, uint og_num,
-	      ORDER *order_init, ORDER *group_init,
-	      Item *having_init,
+	      ORDER *order_init, bool skip_order_by,
+              ORDER *group_init, Item *having_init,
 	      ORDER *proc_param_init, SELECT_LEX *select_lex_arg,
 	      SELECT_LEX_UNIT *unit_arg)
 {
@@ -521,29 +633,89 @@ JOIN::prepare(Item ***rref_pointer_array,
   join_list= &select_lex->top_join_list;
   union_part= unit_arg->is_union();
 
+  if (select_lex->handle_derived(thd->lex, DT_PREPARE))
+    DBUG_RETURN(1);
+
   thd->lex->current_select->is_item_list_lookup= 1;
   /*
     If we have already executed SELECT, then it have not sense to prevent
     its table from update (see unique_table())
+    Affects only materialized derived tables.
   */
-  if (thd->derived_tables_processing)
-    select_lex->exclude_from_table_unique_test= TRUE;
-
   /* Check that all tables, fields, conds and order are ok */
-
   if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
       setup_tables_and_check_access(thd, &select_lex->context, join_list,
-                                    tables_list, &select_lex->leaf_tables,
-                                    FALSE, SELECT_ACL, SELECT_ACL))
+                                    tables_list, select_lex->leaf_tables,
+                                    FALSE, SELECT_ACL, SELECT_ACL, FALSE))
       DBUG_RETURN(-1);
+
+  /*
+    Permanently remove redundant parts from the query if
+      1) This is a subquery
+      2) This is the first time this query is optimized (since the
+         transformation is permanent
+      3) Not normalizing a view. Removal should take place when a
+         query involving a view is optimized, not when the view
+         is created
+  */
+  if (select_lex->master_unit()->item &&                               // 1)
+      select_lex->first_cond_optimization &&                           // 2)
+      !thd->lex->is_view_context_analysis())                           // 3)
+  {
+    remove_redundant_subquery_clauses(select_lex);
+  }
+  
+  /*
+    TRUE if the SELECT list mixes elements with and without grouping,
+    and there is no GROUP BY clause. Mixing non-aggregated fields with
+    aggregate functions in the SELECT list is a MySQL exptenstion that
+    is allowed only if the ONLY_FULL_GROUP_BY sql mode is not set.
+  */
+  mixed_implicit_grouping= false;
+  if ((~thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) &&
+      select_lex->with_sum_func && !group_list)
+  {
+    List_iterator_fast <Item> select_it(fields_list);
+    Item *select_el; /* Element of the SELECT clause, can be an expression. */
+    bool found_field_elem= false;
+    bool found_sum_func_elem= false;
+
+    while ((select_el= select_it++))
+    {
+      if (select_el->with_sum_func)
+        found_sum_func_elem= true;
+      if (select_el->with_field)
+        found_field_elem= true;
+      if (found_sum_func_elem && found_field_elem)
+      {
+        mixed_implicit_grouping= true;
+        break;
+      }
+    }
+  }
+
+  table_count= select_lex->leaf_tables.elements;
  
-  TABLE_LIST *table_ptr;
-  for (table_ptr= select_lex->leaf_tables;
-       table_ptr;
-       table_ptr= table_ptr->next_leaf)
-    tables++;
+  TABLE_LIST *tbl;
+  List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
+  while ((tbl= li++))
+  {
+    //table_count++; /* Count the number of tables in the join. */
+    /*
+      If the query uses implicit grouping where the select list contains both
+      aggregate functions and non-aggregate fields, any non-aggregated field
+      may produce a NULL value. Set all fields of each table as nullable before
+      semantic analysis to take into account this change of nullability.
+
+      Note: this loop doesn't touch tables inside merged semi-joins, because
+      subquery-to-semijoin conversion has not been done yet. This is intended.
+    */
+    if (mixed_implicit_grouping && tbl->table)
+      tbl->table->maybe_null= 1;
+  }
 
-  if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num) ||
+  if ((wild_num && setup_wild(thd, tables_list, fields_list, &all_fields,
+                              wild_num)) ||
       select_lex->setup_ref_array(thd, og_num) ||
       setup_fields(thd, (*rref_pointer_array), fields_list, MARK_COLUMNS_READ,
 		   &all_fields, 1) ||
@@ -554,42 +726,45 @@ JOIN::prepare(Item ***rref_pointer_array,
     DBUG_RETURN(-1);				/* purecov: inspected */
 
   ref_pointer_array= *rref_pointer_array;
-  
+
+  /* Resolve the ORDER BY that was skipped, then remove it. */
+  if (skip_order_by && select_lex != select_lex->master_unit()->global_parameters)
+  {
+    if (setup_order(thd, (*rref_pointer_array), tables_list, fields_list,
+                    all_fields, select_lex->order_list.first))
+      DBUG_RETURN(-1);
+    select_lex->order_list.empty();
+  }
+
   if (having)
   {
     nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
     thd->where="having clause";
     thd->lex->allow_sum_func|= (nesting_map)1 << select_lex_arg->nest_level;
     select_lex->having_fix_field= 1;
+    /*
+      Wrap alone field in HAVING clause in case it will be outer field of subquery
+      which need persistent pointer on it, but having could be changed by optimizer
+    */
+    if (having->type() == Item::REF_ITEM &&
+        ((Item_ref *)having)->ref_type() == Item_ref::REF)
+      wrap_ident(thd, &having);
     bool having_fix_rc= (!having->fixed &&
 			 (having->fix_fields(thd, &having) ||
 			  having->check_cols(1)));
     select_lex->having_fix_field= 0;
-    select_lex->having= having;
 
     if (having_fix_rc || thd->is_error())
       DBUG_RETURN(-1);				/* purecov: inspected */
     thd->lex->allow_sum_func= save_allow_sum_func;
   }
-
-  if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
-      !(select_options & SELECT_DESCRIBE))
-  {
-    Item_subselect *subselect;
-    /* Is it subselect? */
-    if ((subselect= select_lex->master_unit()->item))
-    {
-      Item_subselect::trans_res res;
-      if ((res= subselect->select_transformer(this)) !=
-	  Item_subselect::RES_OK)
-      {
-        select_lex->fix_prepare_information(thd, &conds, &having);
-	DBUG_RETURN((res == Item_subselect::RES_ERROR));
-      }
-    }
-  }
+  
+  int res= check_and_do_in_subquery_rewrites(this);
 
   select_lex->fix_prepare_information(thd, &conds, &having);
+  
+  if (res)
+    DBUG_RETURN(res);
 
   if (order)
   {
@@ -639,8 +814,7 @@ JOIN::prepare(Item ***rref_pointer_array,
   }
 
   if (select_lex->inner_refs_list.elements &&
-      fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array,
-                     group_list))
+      fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array))
     DBUG_RETURN(-1);
 
   if (group_list)
@@ -665,15 +839,15 @@ JOIN::prepare(Item ***rref_pointer_array,
     }
   }
 
-  if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
-    DBUG_RETURN(-1);
-  
-
   /*
     Check if there are references to un-aggregated columns when computing 
     aggregate functions with implicit grouping (there is no GROUP BY).
   */
   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list &&
+      !(select_lex->master_unit()->item &&
+        select_lex->master_unit()->item->is_in_predicate() &&
+        ((Item_in_subselect*)select_lex->master_unit()->item)->
+        test_set_strategy(SUBS_MAXMIN_INJECTED)) &&
       select_lex->non_agg_field_used() &&
       select_lex->agg_func_used())
   {
@@ -728,11 +902,36 @@ JOIN::prepare(Item ***rref_pointer_array,
   if (!procedure && result && result->prepare(fields_list, unit_arg))
     goto err;					/* purecov: inspected */
 
+  unit= unit_arg;
+  if (prepare_stage2())
+    goto err;
+
+  DBUG_RETURN(0); // All OK
+
+err:
+  delete procedure;                /* purecov: inspected */
+  procedure= 0;
+  DBUG_RETURN(-1);                /* purecov: inspected */
+}
+
+
+/**
+  Second phase of prepare where we collect some statistic.
+
+  @details
+  We made this part separate to be able recalculate some statistic after
+  transforming subquery on optimization phase.
+*/
+
+bool JOIN::prepare_stage2()
+{
+  bool res= TRUE;
+  DBUG_ENTER("JOIN::prepare_stage2");
+
   /* Init join struct */
   count_field_types(select_lex, &tmp_table_param, all_fields, 0);
   ref_pointer_array_size= all_fields.elements*sizeof(Item*);
   this->group= group_list != 0;
-  unit= unit_arg;
 
   if (tmp_table_param.sum_func_count && !group_list)
     implicit_grouping= TRUE;
@@ -749,94 +948,9 @@ JOIN::prepare(Item ***rref_pointer_array,
   if (alloc_func_list())
     goto err;
 
-  DBUG_RETURN(0); // All OK
-
+  res= FALSE;
 err:
-  delete procedure;				/* purecov: inspected */
-  procedure= 0;
-  DBUG_RETURN(-1);				/* purecov: inspected */
-}
-
-
-/*
-  Remove the predicates pushed down into the subquery
-
-  SYNOPSIS
-    JOIN::remove_subq_pushed_predicates()
-      where   IN  Must be NULL
-              OUT The remaining WHERE condition, or NULL
-
-  DESCRIPTION
-    Given that this join will be executed using (unique|index)_subquery,
-    without "checking NULL", remove the predicates that were pushed down
-    into the subquery.
-
-    If the subquery compares scalar values, we can remove the condition that
-    was wrapped into trig_cond (it will be checked when needed by the subquery
-    engine)
-
-    If the subquery compares row values, we need to keep the wrapped
-    equalities in the WHERE clause: when the left (outer) tuple has both NULL
-    and non-NULL values, we'll do a full table scan and will rely on the
-    equalities corresponding to non-NULL parts of left tuple to filter out
-    non-matching records.
-
-    TODO: We can remove the equalities that will be guaranteed to be true by the
-    fact that subquery engine will be using index lookup. This must be done only
-    for cases where there are no conversion errors of significance, e.g. 257
-    that is searched in a byte. But this requires homogenization of the return 
-    codes of all Field*::store() methods.
-*/
-
-void JOIN::remove_subq_pushed_predicates(Item **where)
-{
-  if (conds->type() == Item::FUNC_ITEM &&
-      ((Item_func *)this->conds)->functype() == Item_func::EQ_FUNC &&
-      ((Item_func *)conds)->arguments()[0]->type() == Item::REF_ITEM &&
-      ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
-      test_if_ref ((Item_field *)((Item_func *)conds)->arguments()[1],
-                   ((Item_func *)conds)->arguments()[0]))
-  {
-    *where= 0;
-    return;
-  }
-}
-
-
-/*
-  Index lookup-based subquery: save some flags for EXPLAIN output
-
-  SYNOPSIS
-    save_index_subquery_explain_info()
-      join_tab  Subquery's join tab (there is only one as index lookup is
-                only used for subqueries that are single-table SELECTs)
-      where     Subquery's WHERE clause
-
-  DESCRIPTION
-    For index lookup-based subquery (i.e. one executed with
-    subselect_uniquesubquery_engine or subselect_indexsubquery_engine),
-    check its EXPLAIN output row should contain 
-      "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) 
-      "Using Where" (TAB_INFO_USING_WHERE)
-      "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
-    and set appropriate flags in join_tab->packed_info.
-*/
-
-static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
-{
-  join_tab->packed_info= TAB_INFO_HAVE_VALUE;
-  if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
-    join_tab->packed_info |= TAB_INFO_USING_INDEX;
-  if (where)
-    join_tab->packed_info |= TAB_INFO_USING_WHERE;
-  for (uint i = 0; i < join_tab->ref.key_parts; i++)
-  {
-    if (join_tab->ref.cond_guards[i])
-    {
-      join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL;
-      break;
-    }
-  }
+  DBUG_RETURN(res);				/* purecov: inspected */
 }
 
 
@@ -855,7 +969,11 @@ static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
 int
 JOIN::optimize()
 {
+  ulonglong select_opts_for_readinfo;
+  uint no_jbuf_after;
   DBUG_ENTER("JOIN::optimize");
+
+  do_send_rows = (unit->select_limit_cnt) ? 1 : 0;
   // to prevent double initialization on EXPLAIN
   if (optimized)
     DBUG_RETURN(0);
@@ -863,16 +981,54 @@ JOIN::optimize()
   DEBUG_SYNC(thd, "before_join_optimize");
 
   thd_proc_info(thd, "optimizing");
+
+  set_allowed_join_cache_types();
+  need_distinct= TRUE;
+
+  /* Run optimize phase for all derived tables/views used in this SELECT. */
+  if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
+    DBUG_RETURN(1);
+
+  if (select_lex->first_cond_optimization)
+  {
+    //Do it only for the first execution
+    /* Merge all mergeable derived tables/views in this SELECT. */
+    if (select_lex->handle_derived(thd->lex, DT_MERGE))
+      DBUG_RETURN(TRUE);  
+    table_count= select_lex->leaf_tables.elements;
+  }
+  // Update used tables after all handling derived table procedures
+  select_lex->update_used_tables();
+
+  if (transform_max_min_subquery())
+    DBUG_RETURN(1); /* purecov: inspected */
+
+  if (select_lex->first_cond_optimization)
+  {
+    /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+    if (convert_join_subqueries_to_semijoins(this))
+      DBUG_RETURN(1); /* purecov: inspected */
+    /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+    select_lex->update_used_tables();
+
+  }
+  
+  eval_select_list_used_tables();
+
+  if (optimize_constant_subqueries())
+    DBUG_RETURN(1);
+
+  table_count= select_lex->leaf_tables.elements;
+
+  if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
+    DBUG_RETURN(-1);
+
   row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
 	      unit->select_limit_cnt);
   /* select_limit is used to decide if we are likely to scan the whole table */
   select_limit= unit->select_limit_cnt;
   if (having || (select_options & OPTION_FOUND_ROWS))
     select_limit= HA_POS_ERROR;
-  do_send_rows = (unit->select_limit_cnt) ? 1 : 0;
-  // Ignore errors of execution if option IGNORE present
-  if (thd->lex->ignore)
-    thd->lex->current_select->no_error= 1;
 #ifdef HAVE_REF_TO_FIELDS			// Not done yet
   /* Add HAVING to WHERE if possible */
   if (having && !group_list && !sum_func_count)
@@ -895,7 +1051,8 @@ JOIN::optimize()
     }
   }
 #endif
-  SELECT_LEX *sel= thd->lex->current_select;
+
+  SELECT_LEX *sel= select_lex;
   if (sel->first_cond_optimization)
   {
     /*
@@ -903,25 +1060,31 @@ JOIN::optimize()
       MEMROOT for prepared statements and stored procedures.
     */
 
-    Query_arena *arena= thd->stmt_arena, backup;
-    if (arena->is_conventional())
-      arena= 0;                                   // For easier test
-    else
-      thd->set_n_backup_active_arena(arena, &backup);
+    Query_arena *arena, backup;
+    arena= thd->activate_stmt_arena_if_needed(&backup);
 
     sel->first_cond_optimization= 0;
 
     /* Convert all outer joins to inner joins if possible */
-    conds= simplify_joins(this, join_list, conds, TRUE);
+    conds= simplify_joins(this, join_list, conds, TRUE, FALSE);
+    if (select_lex->save_leaf_tables(thd))
+      DBUG_RETURN(1);
     build_bitmap_for_nested_joins(join_list, 0);
 
     sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
 
+    sel->where= conds;
+
     if (arena)
       thd->restore_active_arena(arena, &backup);
   }
+  
+  if (setup_jtbm_semi_joins(this, join_list, &conds))
+    DBUG_RETURN(1);
 
-  conds= optimize_cond(this, conds, join_list, &cond_value);   
+  conds= optimize_cond(this, conds, join_list, FALSE,
+                       &cond_value, &cond_equal);
+     
   if (thd->is_error())
   {
     error= 1;
@@ -930,7 +1093,9 @@ JOIN::optimize()
   }
 
   {
-    having= optimize_cond(this, having, join_list, &having_value);
+    having= optimize_cond(this, having, join_list, TRUE,
+                          &having_value, &having_equal);
+
     if (thd->is_error())
     {
       error= 1;
@@ -938,10 +1103,17 @@ JOIN::optimize()
       DBUG_RETURN(1);
     }
     if (select_lex->where)
+    {
       select_lex->cond_value= cond_value;
+      if (sel->where != conds && cond_value == Item::COND_OK)
+        thd->change_item_tree(&sel->where, conds);
+    }  
     if (select_lex->having)
+    {
       select_lex->having_value= having_value;
-
+      if (sel->having != having && having_value == Item::COND_OK)
+        thd->change_item_tree(&sel->having, having);    
+    }
     if (cond_value == Item::COND_FALSE || having_value == Item::COND_FALSE || 
         (!unit->select_limit_cnt && !(select_options & OPTION_FOUND_ROWS)))
     {						/* Impossible cond */
@@ -949,23 +1121,25 @@ JOIN::optimize()
                             "Impossible HAVING" : "Impossible WHERE"));
       zero_result_cause=  having_value == Item::COND_FALSE ?
                            "Impossible HAVING" : "Impossible WHERE";
-      tables= 0;
+      table_count= top_join_tab_count= 0;
       error= 0;
-      DBUG_RETURN(0);
+      goto setup_subq_exit;
     }
   }
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   {
     TABLE_LIST *tbl;
-    for (tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
+    List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
+    while ((tbl= li++))
     {
       /* 
         If tbl->embedding!=NULL that means that this table is in the inner
         part of the nested outer join, and we can't do partition pruning
         (TODO: check if this limitation can be lifted)
       */
-      if (!tbl->embedding)
+      if (!tbl->embedding ||
+          (tbl->embedding && tbl->embedding->sj_on_expr))
       {
         Item *prune_cond= tbl->on_expr? tbl->on_expr : conds;
         tbl->table->no_partitions_used= prune_partitions(thd, tbl->table,
@@ -995,13 +1169,14 @@ JOIN::optimize()
     */
     if ((res=opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds)))
     {
+      DBUG_ASSERT(res >= 0);
       if (res == HA_ERR_KEY_NOT_FOUND)
       {
         DBUG_PRINT("info",("No matching min/max row"));
 	zero_result_cause= "No matching min/max row";
-        tables= 0;
+        table_count= top_join_tab_count= 0;
 	error=0;
-	DBUG_RETURN(0);
+        goto setup_subq_exit;
       }
       if (res > 1)
       {
@@ -1009,18 +1184,11 @@ JOIN::optimize()
         DBUG_PRINT("error",("Error from opt_sum_query"));
         DBUG_RETURN(1);
       }
-      if (res < 0)
-      {
-        DBUG_PRINT("info",("No matching min/max row"));
-        zero_result_cause= "No matching min/max row";
-        tables= 0;
-        error=0;
-        DBUG_RETURN(0);
-      }
+
       DBUG_PRINT("info",("Select tables optimized away"));
       zero_result_cause= "Select tables optimized away";
       tables_list= 0;				// All tables resolved
-      const_tables= tables;
+      const_tables= top_join_tab_count= table_count;
       /*
         Extract all table-independent conditions and replace the WHERE
         clause with them. All other conditions were computed by opt_sum_query
@@ -1034,7 +1202,8 @@ JOIN::optimize()
       if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
       {
         COND *table_independent_conds=
-          make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0);
+          make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1,
+                              FALSE, FALSE);
         DBUG_EXECUTE("where",
                      print_where(table_independent_conds,
                                  "where after opt_sum_query()",
@@ -1047,10 +1216,11 @@ JOIN::optimize()
   {
     DBUG_PRINT("info",("No tables"));
     error= 0;
-    DBUG_RETURN(0);
+    goto setup_subq_exit;
   }
   error= -1;					// Error is sent to client
-  sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
+  /* get_sort_by_table() call used to be here: */
+  MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table));
 
   /* Calculate how to do the join */
   thd_proc_info(thd, "statistics");
@@ -1061,6 +1231,9 @@ JOIN::optimize()
     DBUG_RETURN(1);
   }
 
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
+    drop_unused_derived_keys();
+
   if (rollup.state != ROLLUP::STATE_NONE)
   {
     if (rollup_process_const_fields())
@@ -1072,7 +1245,7 @@ JOIN::optimize()
   else
   {
     /* Remove distinct if only const tables */
-    select_distinct= select_distinct && (const_tables != tables);
+    select_distinct= select_distinct && (const_tables != table_count);
   }
 
   thd_proc_info(thd, "preparing");
@@ -1082,15 +1255,13 @@ JOIN::optimize()
     DBUG_RETURN(1);				// error == -1
   }
   if (const_table_map != found_const_table_map &&
-      !(select_options & SELECT_DESCRIBE) &&
-      (!conds ||
-       !(conds->used_tables() & RAND_TABLE_BIT) ||
-       select_lex->master_unit() == &thd->lex->unit)) // upper level SELECT
+      !(select_options & SELECT_DESCRIBE))
   {
+    // There is at least one empty const table
     zero_result_cause= "no matching row in const table";
     DBUG_PRINT("error",("Error: %s", zero_result_cause));
     error= 0;
-    DBUG_RETURN(0);
+    goto setup_subq_exit;
   }
   if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
       best_read > (double) thd->variables.max_join_size &&
@@ -1102,13 +1273,22 @@ JOIN::optimize()
   }
   if (const_tables && !thd->locked_tables_mode &&
       !(select_options & SELECT_NO_UNLOCK))
-    mysql_unlock_some_tables(thd, all_tables, const_tables);
+    mysql_unlock_some_tables(thd, table, const_tables);
   if (!conds && outer_join)
   {
     /* Handle the case where we have an OUTER JOIN without a WHERE */
     conds=new Item_int((longlong) 1,1);	// Always true
   }
-  select= make_select(*all_tables, const_table_map,
+
+  if (impossible_where)
+  {
+    zero_result_cause=
+      "Impossible WHERE noticed after reading const tables";
+    select_lex->mark_const_derived(zero_result_cause);
+    goto setup_subq_exit;
+  }
+
+  select= make_select(*table, const_table_map,
                       const_table_map, conds, 1, &error);
   if (error)
   {						/* purecov: inspected */
@@ -1117,8 +1297,11 @@ JOIN::optimize()
     DBUG_RETURN(1);
   }
   
-  reset_nj_counters(join_list);
-  make_outerjoin_info(this);
+  reset_nj_counters(this, join_list);
+  if (make_outerjoin_info(this))
+  {
+    DBUG_RETURN(1);
+  }
 
   /*
     Among the equal fields belonging to the same multiple equality
@@ -1128,7 +1311,14 @@ JOIN::optimize()
   */
   if (conds)
   {
-    conds= substitute_for_best_equal_field(conds, cond_equal, map2table);
+    conds= substitute_for_best_equal_field(NO_PARTICULAR_TAB, conds, 
+                                           cond_equal, map2table);
+    if (thd->is_error())
+    {
+      error= 1;
+      DBUG_PRINT("error",("Error from substitute_for_best_equal"));
+      DBUG_RETURN(1);
+    }
     conds->update_used_tables();
     DBUG_EXECUTE("where",
                  print_where(conds,
@@ -1137,51 +1327,96 @@ JOIN::optimize()
   }
 
   /*
-    Permorm the the optimization on fields evaluation mentioned above
+    Perform the optimization on fields evaluation mentioned above
     for all on expressions.
-  */ 
-  for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++)
+  */
+  JOIN_TAB *tab;
+  for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab;
+       tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
   {
     if (*tab->on_expr_ref)
     {
-      *tab->on_expr_ref= substitute_for_best_equal_field(*tab->on_expr_ref,
+      *tab->on_expr_ref= substitute_for_best_equal_field(NO_PARTICULAR_TAB,
+                                                         *tab->on_expr_ref,
                                                          tab->cond_equal,
                                                          map2table);
+      if (thd->is_error())
+      {
+        error= 1;
+        DBUG_PRINT("error",("Error from substitute_for_best_equal"));
+        DBUG_RETURN(1);
+      }
       (*tab->on_expr_ref)->update_used_tables();
     }
   }
 
-  if (conds && const_table_map != found_const_table_map &&
-      (select_options & SELECT_DESCRIBE))
-  {
-    conds=new Item_int((longlong) 0,1);	// Always false
-  }
-
   /*
-    It's necessary to check const part of HAVING cond as
-    there is a chance that some cond parts may become
-    const items after make_join_statisctics(for example
-    when Item is a reference to cost table field from
-    outer join).
-    This check is performed only for those conditions
-    which do not use aggregate functions. In such case
-    temporary table may not be used and const condition
-    elements may be lost during further having
-    condition transformation in JOIN::exec.
+    Perform the optimization on fields evaliation mentioned above
+    for all used ref items.
   */
-  if (having && const_table_map && !having->with_sum_func)
+  for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab;
+       tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
   {
-    having->update_used_tables();
-    having= remove_eq_conds(thd, having, &having_value);
-    if (having_value == Item::COND_FALSE)
+    uint key_copy_index=0;
+    for (uint i=0; i < tab->ref.key_parts; i++)
     {
-      having= new Item_int((longlong) 0,1);
-      zero_result_cause= "Impossible HAVING noticed after reading const tables";
-      error= 0;
-      DBUG_RETURN(0);
+      Item **ref_item_ptr= tab->ref.items+i;
+      Item *ref_item= *ref_item_ptr;
+      if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE))
+        continue;
+      COND_EQUAL *equals= cond_equal;
+      JOIN_TAB *first_inner= tab->first_inner;
+      while (equals)
+      {
+        ref_item= substitute_for_best_equal_field(tab, ref_item,
+                                                  equals, map2table);
+        if (first_inner)
+	{
+          equals= first_inner->cond_equal;
+          first_inner= first_inner->first_upper;
+        }
+        else
+          equals= 0;
+      }  
+      ref_item->update_used_tables();
+      if (*ref_item_ptr != ref_item)
+      {
+        *ref_item_ptr= ref_item;
+        Item *item= ref_item->real_item();
+        store_key *key_copy= tab->ref.key_copy[key_copy_index];
+        if (key_copy->type() == store_key::FIELD_STORE_KEY)
+        {
+          if (item->basic_const_item())
+          {
+            /* It is constant propagated here */
+            tab->ref.key_copy[key_copy_index]=
+              new store_key_const_item(*tab->ref.key_copy[key_copy_index],
+                                       item);
+          }
+          else if (item->const_item())
+	  {
+            tab->ref.key_copy[key_copy_index]=
+              new store_key_item(*tab->ref.key_copy[key_copy_index],
+                                 item, TRUE);
+          }            
+          else
+          {
+            store_key_field *field_copy= ((store_key_field *)key_copy);
+            DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
+            field_copy->change_source_field((Item_field *) item);
+          }
+        }
+      }
+      key_copy_index++;
     }
   }
 
+  if (conds && const_table_map != found_const_table_map &&
+      (select_options & SELECT_DESCRIBE))
+  {
+    conds=new Item_int((longlong) 0,1);	// Always false
+  }
+
   /* Cache constant expressions in WHERE, HAVING, ON clauses. */
   cache_const_exprs();
 
@@ -1189,7 +1424,8 @@ JOIN::optimize()
   {
     zero_result_cause=
       "Impossible WHERE noticed after reading const tables";
-    DBUG_RETURN(0);				// error == 0
+    select_lex->mark_const_derived(zero_result_cause);
+    goto setup_subq_exit;
   }
 
   error= -1;					/* if goto err */
@@ -1224,7 +1460,7 @@ JOIN::optimize()
 
      The FROM clause must contain a single non-constant table.
   */
-  if (tables - const_tables == 1 && (group_list || select_distinct) &&
+  if (table_count - const_tables == 1 && (group_list || select_distinct) &&
       !tmp_table_param.sum_func_count &&
       (!join_tab[const_tables].select ||
        !join_tab[const_tables].select->quick ||
@@ -1275,7 +1511,7 @@ JOIN::optimize()
     if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE)
       select_distinct=0;
   }
-  else if (select_distinct && tables - const_tables == 1 &&
+  else if (select_distinct && table_count - const_tables == 1 &&
            rollup.state == ROLLUP::STATE_NONE)
   {
     /*
@@ -1346,7 +1582,10 @@ JOIN::optimize()
       DBUG_RETURN(1);
     }
     if (old_group_list && !group_list)
+    {
+      DBUG_ASSERT(group);
       select_distinct= 0;
+    }
   }
   if (!group_list && group)
   {
@@ -1380,7 +1619,7 @@ JOIN::optimize()
   }
 
   // Can't use sort on head table if using join buffering
-  if (full_join)
+  if (full_join || hash_join)
   {
     TABLE *stable= (sort_by_table == (TABLE *) 1 ? 
       join_tab[const_tables].table : sort_by_table);
@@ -1397,93 +1636,39 @@ JOIN::optimize()
     }
   }
 
+  need_tmp= test_if_need_tmp_table();
+
   /*
-    Check if we need to create a temporary table.
-    This has to be done if all tables are not already read (const tables)
-    and one of the following conditions holds:
-    - We are using DISTINCT (simple distinct's are already optimized away)
-    - We are using an ORDER BY or GROUP BY on fields not in the first table
-    - We are using different ORDER BY and GROUP BY orders
-    - The user wants us to buffer the result.
-    When the WITH ROLLUP modifier is present, we cannot skip temporary table
-    creation for the DISTINCT clause just because there are only const tables.
+    If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
+    whose columns are required to be returned in a sorted order, then
+    the proper value for no_jbuf_after should be yielded by a call to
+    the make_join_orderinfo function.
+    Yet the current implementation of FORCE INDEX hints does not
+    allow us to do it in a clean manner.
   */
-  need_tmp= ((const_tables != tables &&
-	     ((select_distinct || !simple_order || !simple_group) ||
-	      (group_list && order) ||
-	      test(select_options & OPTION_BUFFER_RESULT))) ||
-             (rollup.state != ROLLUP::STATE_NONE && select_distinct));
-
-  // No cache for MATCH
-  make_join_readinfo(this,
-		     (select_options & (SELECT_DESCRIBE |
-					SELECT_NO_JOIN_CACHE)) |
-		     (select_lex->ftfunc_list->elements ?
-		      SELECT_NO_JOIN_CACHE : 0));
+  no_jbuf_after= 1 ? table_count : make_join_orderinfo(this);
+
+  // Don't use join buffering when we use MATCH
+  select_opts_for_readinfo=
+    (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
+    (select_lex->ftfunc_list->elements ?  SELECT_NO_JOIN_CACHE : 0);
+
+  if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
+    DBUG_RETURN(1);
 
   /* Perform FULLTEXT search before all regular searches */
   if (!(select_options & SELECT_DESCRIBE))
     init_ftfuncs(thd, select_lex, test(order));
 
-  /*
-    is this simple IN subquery?
-  */
-  if (!group_list && !order &&
-      unit->item && unit->item->substype() == Item_subselect::IN_SUBS &&
-      tables == 1 && conds &&
-      !unit->is_union())
-  {
-    if (!having)
-    {
-      Item *where= conds;
-      if (join_tab[0].type == JT_EQ_REF &&
-	  join_tab[0].ref.items[0]->name == in_left_expr_name)
-      {
-        remove_subq_pushed_predicates(&where);
-        save_index_subquery_explain_info(join_tab, where);
-        join_tab[0].type= JT_UNIQUE_SUBQUERY;
-        error= 0;
-        DBUG_RETURN(unit->item->
-                    change_engine(new
-                                  subselect_uniquesubquery_engine(thd,
-                                                                  join_tab,
-                                                                  unit->item,
-                                                                  where)));
-      }
-      else if (join_tab[0].type == JT_REF &&
-	       join_tab[0].ref.items[0]->name == in_left_expr_name)
-      {
-	remove_subq_pushed_predicates(&where);
-        save_index_subquery_explain_info(join_tab, where);
-        join_tab[0].type= JT_INDEX_SUBQUERY;
-        error= 0;
-        DBUG_RETURN(unit->item->
-                    change_engine(new
-                                  subselect_indexsubquery_engine(thd,
-                                                                 join_tab,
-                                                                 unit->item,
-                                                                 where,
-                                                                 NULL,
-                                                                 0)));
-      }
-    } else if (join_tab[0].type == JT_REF_OR_NULL &&
-	       join_tab[0].ref.items[0]->name == in_left_expr_name &&
-               having->name == in_having_cond)
-    {
-      join_tab[0].type= JT_INDEX_SUBQUERY;
-      error= 0;
-      conds= remove_additional_cond(conds);
-      save_index_subquery_explain_info(join_tab, conds);
-      DBUG_RETURN(unit->item->
-		  change_engine(new subselect_indexsubquery_engine(thd,
-								   join_tab,
-								   unit->item,
-								   conds,
-                                                                   having,
-								   1)));
-    }
+  if (optimize_unflattened_subqueries())
+    DBUG_RETURN(1);
+  
+  int res;
+  if ((res= rewrite_to_index_subquery_engine(this)) != -1)
+    DBUG_RETURN(res);
+  if (setup_subquery_caches())
+    DBUG_RETURN(-1);
 
-  }
   /*
     Need to tell handlers that to play it safe, it should fetch all
     columns of the primary key of the tables: this is because MySQL may
@@ -1492,13 +1677,16 @@ JOIN::optimize()
   */
   if (need_tmp || select_distinct || group_list || order)
   {
-    for (uint i = const_tables; i < tables; i++)
-      join_tab[i].table->prepare_for_position();
+    for (uint i= 0; i < table_count; i++)
+    {
+      if (!(table[i]->map & const_table_map))
+        table[i]->prepare_for_position();
+    }
   }
 
   DBUG_EXECUTE("info",TEST_join(this););
 
-  if (const_tables != tables)
+  if (const_tables != table_count)
   {
     /*
       Because filesort always does a full table scan or a quick range scan
@@ -1554,7 +1742,7 @@ JOIN::optimize()
       for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
       {
         Item *item= *tmp_order->item;
-        if (item->walk(&Item::is_expensive_processor, 0, (uchar*)0))
+        if (item->is_expensive())
         {
           /* Force tmp table without sort */
           need_tmp=1; simple_order=simple_group=0;
@@ -1568,7 +1756,7 @@ JOIN::optimize()
   if (select_options & SELECT_DESCRIBE)
   {
     error= 0;
-    DBUG_RETURN(0);
+    goto derived_exit;
   }
   having= 0;
 
@@ -1583,7 +1771,6 @@ JOIN::optimize()
     single table queries, thus it is sufficient to test only the first
     join_tab element of the plan for its access method.
   */
-  bool need_distinct= TRUE;
   if (join_tab->is_using_loose_index_scan())
   {
     tmp_table_param.precomputed_group_by= TRUE;
@@ -1594,6 +1781,54 @@ JOIN::optimize()
     }
   }
 
+  error= 0;
+
+  DBUG_RETURN(0);
+
+setup_subq_exit:
+  /* Choose an execution strategy for this JOIN. */
+  if (!tables_list || !table_count)
+    choose_tableless_subquery_plan();
+  /*
+    Even with zero matching rows, subqueries in the HAVING clause may
+    need to be evaluated if there are aggregate functions in the query.
+  */
+  if (optimize_unflattened_subqueries())
+    DBUG_RETURN(1);
+  error= 0;
+
+derived_exit:
+  select_lex->mark_const_derived(zero_result_cause);
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Create and initialize objects neeed for the execution of a query plan.
+  Evaluate constant expressions not evaluated during optimization.
+*/
+
+int JOIN::init_execution()
+{
+  DBUG_ENTER("JOIN::init_execution");
+
+  DBUG_ASSERT(optimized);
+  DBUG_ASSERT(!(select_options & SELECT_DESCRIBE));
+  initialized= true;
+
+  /*
+    Enable LIMIT ROWS EXAMINED during query execution if:
+    (1) This JOIN is the outermost query (not a subquery or derived table)
+        This ensures that the limit is enabled when actual execution begins, and
+        not if a subquery is evaluated during optimization of the outer query.
+    (2) This JOIN is not the result of a UNION. In this case do not apply the
+        limit in order to produce the partial query result stored in the
+        UNION temp table.
+  */
+  if (!select_lex->outer_select() &&                            // (1)
+      select_lex != select_lex->master_unit()->fake_select_lex) // (2)
+    thd->lex->set_limit_rows_examined();
+
   /* Create a tmp table if distinct or if the sort is too complicated */
   if (need_tmp)
   {
@@ -1684,13 +1919,14 @@ JOIN::optimize()
 
     if (exec_tmp_table1->distinct)
     {
-      table_map used_tables= thd->lex->used_tables;
-      JOIN_TAB *last_join_tab= join_tab+tables-1;
+      table_map used_tables= select_list_used_tables;
+      JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1;
       do
       {
-	if (used_tables & last_join_tab->table->map)
+	if (used_tables & last_join_tab->table->map ||
+            last_join_tab->use_join_cache)
 	  break;
-	last_join_tab->not_used_in_distinct=1;
+	last_join_tab->shortcut_for_distinct= true;
       } while (last_join_tab-- != join_tab);
       /* Optimize "select distinct b from t1 order by key_part_1 limit #" */
       if (order && skip_sort_order)
@@ -1709,12 +1945,102 @@ JOIN::optimize()
       DBUG_RETURN(-1);                         /* purecov: inspected */
   }
 
-  error= 0;
   DBUG_RETURN(0);
 }
 
 
 /**
+  Setup expression caches for subqueries that need them
+
+  @details
+  The function wraps correlated subquery expressions that return one value
+  into objects of the class Item_cache_wrapper setting up an expression
+  cache for each of them. The result values of the subqueries are to be
+  cached together with the corresponding sets of the parameters - outer
+  references of the subqueries.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+bool JOIN::setup_subquery_caches()
+{
+  DBUG_ENTER("JOIN::setup_subquery_caches");
+
+  /*
+    We have to check all this condition together because items created in
+    one of this clauses can be moved to another one by optimizer
+  */
+  if (select_lex->expr_cache_may_be_used[IN_WHERE] ||
+      select_lex->expr_cache_may_be_used[IN_HAVING] ||
+      select_lex->expr_cache_may_be_used[IN_ON] ||
+      select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    if (conds)
+      conds= conds->transform(&Item::expr_cache_insert_transformer,
+                              (uchar*) thd);
+    JOIN_TAB *tab;
+    for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
+         tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+    {
+      if (tab->select_cond)
+        tab->select_cond=
+          tab->select_cond->transform(&Item::expr_cache_insert_transformer,
+                                      (uchar*) thd);
+      if (tab->cache_select && tab->cache_select->cond)
+        tab->cache_select->cond=
+          tab->cache_select->
+          cond->transform(&Item::expr_cache_insert_transformer,
+                          (uchar*) thd);
+
+    }
+
+    if (having)
+      having= having->transform(&Item::expr_cache_insert_transformer,
+                                (uchar*) thd);
+    if (tmp_having)
+    {
+      DBUG_ASSERT(having == NULL);
+      tmp_having= tmp_having->transform(&Item::expr_cache_insert_transformer,
+                                        (uchar*) thd);
+    }
+  }
+  if (select_lex->expr_cache_may_be_used[SELECT_LIST] ||
+      select_lex->expr_cache_may_be_used[IN_GROUP_BY] ||
+      select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    List_iterator<Item> li(all_fields);
+    Item *item;
+    while ((item= li++))
+    {
+      Item *new_item=
+        item->transform(&Item::expr_cache_insert_transformer, (uchar*) thd);
+      if (new_item != item)
+      {
+        thd->change_item_tree(li.ref(), new_item);
+      }
+    }
+    for (ORDER *group= group_list; group ; group= group->next)
+    {
+      *group->item=
+        (*group->item)->transform(&Item::expr_cache_insert_transformer,
+                                  (uchar*) thd);
+    }
+  }
+  if (select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    for (ORDER *ord= order; ord; ord= ord->next)
+    {
+      *ord->item=
+        (*ord->item)->transform(&Item::expr_cache_insert_transformer,
+                                (uchar*) thd);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
   Restore values in temporary join.
 */
 void JOIN::restore_tmp()
@@ -1725,6 +2051,62 @@ void JOIN::restore_tmp()
 }
 
 
+/*
+  Shrink join buffers used for preceding tables to reduce the occupied space
+
+  SYNOPSIS
+    shrink_join_buffers()
+      jt           table up to which the buffers are to be shrunk
+      curr_space   the size of the space used by the buffers for tables 1..jt
+      needed_space the size of the space that has to be used by these buffers
+
+  DESCRIPTION
+    The function makes an attempt to shrink all join buffers used for the
+    tables starting from the first up to jt to reduce the total size of the
+    space occupied by the buffers used for tables 1,...,jt  from curr_space
+    to needed_space.
+    The function assumes that the buffer for the table jt has not been
+    allocated yet.
+
+  RETURN
+    FALSE     if all buffer have been successfully shrunk
+    TRUE      otherwise
+*/
+  
+bool JOIN::shrink_join_buffers(JOIN_TAB *jt, 
+                               ulonglong curr_space,
+                               ulonglong needed_space)
+{
+  JOIN_CACHE *cache;
+  for (JOIN_TAB *tab= join_tab+const_tables; tab < jt; tab++)
+  {
+    cache= tab->cache;
+    if (cache)
+    { 
+      size_t buff_size;
+      if (needed_space < cache->get_min_join_buffer_size())
+        return TRUE;
+      if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space))
+      { 
+        revise_cache_usage(tab);
+        return TRUE;
+      }
+      buff_size= cache->get_join_buffer_size();
+      curr_space-= buff_size;
+      needed_space-= buff_size;
+    }
+  }
+
+  cache= jt->cache;
+  DBUG_ASSERT(cache);
+  if (needed_space < cache->get_min_join_buffer_size())
+    return TRUE;
+  cache->set_join_buffer_size((size_t)needed_space);
+  
+  return FALSE;
+}
+
+
 int
 JOIN::reinit()
 {
@@ -1735,6 +2117,7 @@ JOIN::reinit()
                                     ULL(0));
 
   first_record= 0;
+  cleaned= false;
 
   if (exec_tmp_table1)
   {
@@ -1750,16 +2133,23 @@ JOIN::reinit()
     free_io_cache(exec_tmp_table2);
     filesort_free_buffers(exec_tmp_table2,0);
   }
+  clear_sj_tmp_tables(this);
   if (items0)
     set_items_ref_array(items0);
 
   if (join_tab_save)
-    memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * tables);
+    memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * table_count);
 
   /* need to reset ref access state (see join_read_key) */
   if (join_tab)
-    for (uint i= 0; i < tables; i++)
-      join_tab[i].ref.key_err= TRUE;
+  {
+    JOIN_TAB *tab;
+    for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
+         tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+    {
+      tab->ref.key_err= TRUE;
+    }
+  }
 
   if (tmp_join)
     restore_tmp();
@@ -1772,6 +2162,16 @@ JOIN::reinit()
       func->clear();
   }
 
+  if (no_rows_in_result_called)
+  {
+    /* Reset effect of possible no_rows_in_result() */
+    List_iterator_fast<Item> it(fields_list);
+    Item *item;
+    no_rows_in_result_called= 0;
+    while ((item= it++))
+      item->restore_to_before_no_rows_in_result();
+  }
+
   if (!(select_options & SELECT_DESCRIBE))
     init_ftfuncs(thd, select_lex, test(order));
 
@@ -1806,7 +2206,7 @@ JOIN::save_join_tab()
   if (!join_tab_save && select_lex->master_unit()->uncacheable)
   {
     if (!(join_tab_save= (JOIN_TAB*)thd->memdup((uchar*) join_tab,
-						sizeof(JOIN_TAB) * tables)))
+						sizeof(JOIN_TAB) * table_count)))
       return 1;
   }
   return 0;
@@ -1829,7 +2229,6 @@ JOIN::exec()
 {
   List<Item> *columns_list= &fields_list;
   int      tmp_error;
-  bool     sort_index_created= false;
 
   DBUG_ENTER("JOIN::exec");
 
@@ -1848,7 +2247,7 @@ JOIN::exec()
   }
   (void) result->prepare2(); // Currently, this cannot fail.
 
-  if (!tables_list && (tables || !select_lex->with_sum_func))
+  if (!tables_list && (table_count || !select_lex->with_sum_func))
   {                                           // Only test of functions
     if (select_options & SELECT_DESCRIBE)
       select_describe(this, FALSE, FALSE, FALSE,
@@ -1869,6 +2268,7 @@ JOIN::exec()
         In this case JOIN::exec must check for JOIN::having_value, in the
         same way it checks for JOIN::cond_value.
       */
+      DBUG_ASSERT(error == 0);
       if (cond_value != Item::COND_FALSE &&
           having_value != Item::COND_FALSE &&
           (!conds || conds->val_int()) &&
@@ -1876,19 +2276,18 @@ JOIN::exec()
       {
 	if (do_send_rows &&
             (procedure ? (procedure->send_row(procedure_fields_list) ||
-             procedure->end_of_records()) : result->send_data(fields_list)))
+             procedure->end_of_records()) : result->send_data(fields_list)> 0))
 	  error= 1;
 	else
-	{
-	  error= (int) result->send_eof();
 	  send_records= ((select_options & OPTION_FOUND_ROWS) ? 1 :
                          thd->sent_row_count);
-	}
       }
       else
-      {
-	error=(int) result->send_eof();
         send_records= 0;
+      if (!error)
+      {
+        join_free();                      // Unlock all cursors
+        error= (int) result->send_eof();
       }
     }
     /* Single select (without union) always returns 0 or 1 row */
@@ -1901,9 +2300,28 @@ JOIN::exec()
     FOUND_ROWS() may be called. Never reset the examined row count here.
     It must be accumulated from all join iterations of all join parts.
   */
-  if (tables)
+  if (table_count)
     thd->limit_found_rows= 0;
 
+  /*
+    Evaluate expensive constant conditions that were not evaluated during
+    optimization. Do not evaluate them for EXPLAIN statements as these
+    condtions may be arbitrarily costly, and because the optimize phase
+    might not have produced a complete executable plan for EXPLAINs.
+  */
+  if (exec_const_cond && !(select_options & SELECT_DESCRIBE) &&
+      !exec_const_cond->val_int())
+    zero_result_cause= "Impossible WHERE noticed after reading const tables";
+
+  /* 
+    We've called exec_const_cond->val_int(). This may have caused an error.
+  */
+  if (thd->is_error())
+  {
+    error= thd->is_error();
+    DBUG_VOID_RETURN;
+  }
+
   if (zero_result_cause)
   {
     (void) return_zero_rows(this, result, select_lex->leaf_tables,
@@ -1911,10 +2329,31 @@ JOIN::exec()
 			    send_row_on_empty_set(),
 			    select_options,
 			    zero_result_cause,
-			    having);
+			    having ? having : tmp_having, all_fields);
     DBUG_VOID_RETURN;
   }
 
+  /*
+    Evaluate all constant expressions with subqueries in the ORDER/GROUP clauses
+    to make sure that all subqueries return a single row. The evaluation itself
+    will trigger an error if that is not the case.
+  */
+  if (exec_const_order_group_cond.elements &&
+      !(select_options & SELECT_DESCRIBE))
+  {
+    List_iterator_fast<Item> const_item_it(exec_const_order_group_cond);
+    Item *cur_const_item;
+    while ((cur_const_item= const_item_it++))
+    {
+      cur_const_item->val_str(&cur_const_item->str_value);
+      if (thd->is_error())
+      {
+        error= thd->is_error();
+        DBUG_VOID_RETURN;
+      }
+    }
+  }
+
   if ((this->select_lex->options & OPTION_SCHEMA_TABLE) &&
       get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
     DBUG_VOID_RETURN;
@@ -1938,7 +2377,7 @@ JOIN::exec()
     }
     if (order && 
         (order != group_list || !(select_options & SELECT_BIG_RESULT)) &&
-	(const_tables == tables ||
+	(const_tables == table_count ||
  	 ((simple_order || skip_sort_order) &&
 	  test_if_skip_sort_order(&join_tab[const_tables], order,
 				  select_limit, 0, 
@@ -1949,15 +2388,30 @@ JOIN::exec()
     select_describe(this, need_tmp,
 		    order != 0 && !skip_sort_order,
 		    select_distinct,
-                    !tables ? "No tables used" : NullS);
+                    !table_count ? "No tables used" : NullS);
     DBUG_VOID_RETURN;
   }
+  else
+  {
+    /* it's a const select, materialize it. */
+    select_lex->mark_const_derived(zero_result_cause);
+  }
+
+  if (!initialized && init_execution())
+    DBUG_VOID_RETURN;
 
   JOIN *curr_join= this;
   List<Item> *curr_all_fields= &all_fields;
   List<Item> *curr_fields_list= &fields_list;
   TABLE *curr_tmp_table= 0;
   /*
+    curr_join->join_free() will call JOIN::cleanup(full=TRUE). It will not 
+    be safe to call update_used_tables() after that.
+  */
+  if (curr_join->tmp_having)
+    curr_join->tmp_having->update_used_tables();
+
+  /*
     Initialize examined rows here because the values from all join parts
     must be accumulated in examined_row_count. Hence every join
     iteration must count from zero.
@@ -1980,11 +2434,14 @@ JOIN::exec()
     curr_tmp_table= exec_tmp_table1;
 
     /* Copy data to the temporary table */
-    thd_proc_info(thd, "Copying to tmp table");
+    thd_proc_info(thd, copy_to_tmp_table);
     DBUG_PRINT("info", ("%s", thd->proc_info));
     if (!curr_join->sort_and_group &&
-        curr_join->const_tables != curr_join->tables)
-      curr_join->join_tab[curr_join->const_tables].sorted= 0;
+        curr_join->const_tables != curr_join->table_count)
+    {
+      JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
+      first_tab->sorted= test(first_tab->loosescan_match_tab);
+    }
 
     Procedure *save_proc= curr_join->procedure;
     tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0);
@@ -2000,7 +2457,9 @@ JOIN::exec()
       curr_join->having= curr_join->tmp_having= 0; // Allready done
     
     /* Change sum_fields reference to calculated fields in tmp_table */
+#ifdef HAVE_valgrind
     if (curr_join != this)
+#endif
       curr_join->all_fields= *curr_all_fields;
     if (!items1)
     {
@@ -2020,7 +2479,9 @@ JOIN::exec()
 				      fields_list.elements, all_fields))
 	  DBUG_VOID_RETURN;
       }
+#ifdef HAVE_valgrind
       if (curr_join != this)
+#endif
       {
         curr_join->tmp_all_fields1= tmp_all_fields1;
         curr_join->tmp_fields_list1= tmp_fields_list1;
@@ -2066,8 +2527,8 @@ JOIN::exec()
     */
 
     if ((curr_join->group_list && (!test_if_subpart(curr_join->group_list,
-						   curr_join->order) || 
-				  curr_join->select_distinct)) ||
+                                                    curr_join->order) || 
+                                   curr_join->select_distinct)) ||
 	(curr_join->select_distinct &&
 	 curr_join->tmp_table_param.using_indirect_summary_function))
     {					/* Must copy to another table */
@@ -2125,13 +2586,11 @@ JOIN::exec()
 	{
 	  DBUG_VOID_RETURN;
 	}
-        sort_index_created= true;
         sortorder= curr_join->sortorder;
       }
       
       thd_proc_info(thd, "Copying to group table");
       DBUG_PRINT("info", ("%s", thd->proc_info));
-      tmp_error= -1;
       if (curr_join != this)
       {
 	if (sum_funcs2)
@@ -2153,8 +2612,12 @@ JOIN::exec()
         DBUG_VOID_RETURN;
       curr_join->group_list= 0;
       if (!curr_join->sort_and_group &&
-          curr_join->const_tables != curr_join->tables)
-        curr_join->join_tab[curr_join->const_tables].sorted= 0;
+          curr_join->const_tables != curr_join->table_count)
+      {
+        JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
+        first_tab->sorted= test(first_tab->loosescan_match_tab);
+      }
+      tmp_error= -1;
       if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
 	  (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
 				0)))
@@ -2163,7 +2626,7 @@ JOIN::exec()
 	DBUG_VOID_RETURN;
       }
       end_read_record(&curr_join->join_tab->read_record);
-      curr_join->const_tables= curr_join->tables; // Mark free for cleanup()
+      curr_join->const_tables= curr_join->table_count; // Mark free for cleanup()
       curr_join->join_tab[0].table= 0;           // Table is freed
       
       // No sum funcs anymore
@@ -2174,7 +2637,13 @@ JOIN::exec()
 				     tmp_fields_list2, tmp_all_fields2, 
 				     fields_list.elements, tmp_all_fields1))
 	  DBUG_VOID_RETURN;
+#ifdef HAVE_valgrind
+        /*
+          Some GCCs use memcpy() for struct assignment, even for x=x.
+          GCC bug 19410: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
+        */
         if (curr_join != this)
+#endif
         {
           curr_join->tmp_fields_list2= tmp_fields_list2;
           curr_join->tmp_all_fields2= tmp_all_fields2;
@@ -2190,12 +2659,12 @@ JOIN::exec()
     if (curr_tmp_table->distinct)
       curr_join->select_distinct=0;		/* Each row is unique */
     
+
     curr_join->join_free();			/* Free quick selects */
+
     if (curr_join->select_distinct && ! curr_join->group_list)
     {
       thd_proc_info(thd, "Removing duplicates");
-      if (curr_join->tmp_having)
-	curr_join->tmp_having->update_used_tables();
       if (remove_duplicates(curr_join, curr_tmp_table,
 			    *curr_fields_list, curr_join->tmp_having))
 	DBUG_VOID_RETURN;
@@ -2234,7 +2703,9 @@ JOIN::exec()
       tmp_table_param.save_copy_field= curr_join->tmp_table_param.copy_field;
       tmp_table_param.save_copy_field_end=
 	curr_join->tmp_table_param.copy_field_end;
+#ifdef HAVE_valgrind
       if (curr_join != this)
+#endif
       {
         curr_join->tmp_all_fields3= tmp_all_fields3;
         curr_join->tmp_fields_list3= tmp_fields_list3;
@@ -2269,15 +2740,14 @@ JOIN::exec()
     if (curr_join->tmp_having && ! curr_join->group_list && 
 	! curr_join->sort_and_group)
     {
-      // Some tables may have been const
-      curr_join->tmp_having->update_used_tables();
       JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables];
       table_map used_tables= (curr_join->const_table_map |
 			      curr_table->table->map);
 
-      Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having,
+      Item* sort_table_cond= make_cond_for_table(thd, curr_join->tmp_having,
 						 used_tables,
-						 (table_map) 0);
+						 (table_map)0, -1,
+						 FALSE, FALSE);
       if (sort_table_cond)
       {
 	if (!curr_table->select)
@@ -2291,16 +2761,33 @@ JOIN::exec()
 		new Item_cond_and(curr_table->select->cond,
 				  sort_table_cond)))
 	    DBUG_VOID_RETURN;
-	  curr_table->select->cond->fix_fields(thd, 0);
 	}
-	curr_table->select_cond= curr_table->select->cond;
+        if (curr_table->pre_idx_push_select_cond)
+	{
+          if (sort_table_cond->type() == Item::COND_ITEM)
+            sort_table_cond= sort_table_cond->copy_andor_structure(thd);           
+          if (!(curr_table->pre_idx_push_select_cond= 
+                new Item_cond_and(curr_table->pre_idx_push_select_cond,
+                                  sort_table_cond)))
+            DBUG_VOID_RETURN;            
+        }
+        if (curr_table->select->cond && !curr_table->select->cond->fixed)
+	  curr_table->select->cond->fix_fields(thd, 0);
+        if (curr_table->pre_idx_push_select_cond &&
+            !curr_table->pre_idx_push_select_cond->fixed)
+          curr_table->pre_idx_push_select_cond->fix_fields(thd, 0);
+
+        curr_table->select->pre_idx_push_select_cond=
+          curr_table->pre_idx_push_select_cond;
+        curr_table->set_select_cond(curr_table->select->cond, __LINE__);
 	curr_table->select_cond->top_level_item();
 	DBUG_EXECUTE("where",print_where(curr_table->select->cond,
 					 "select and having",
                                          QT_ORDINARY););
-	curr_join->tmp_having= make_cond_for_table(curr_join->tmp_having,
+	curr_join->tmp_having= make_cond_for_table(thd, curr_join->tmp_having,
 						   ~ (table_map) 0,
-						   ~used_tables);
+						   ~used_tables, -1,
+						   FALSE, FALSE);
 	DBUG_EXECUTE("where",print_where(curr_join->tmp_having,
                                          "having after sort",
                                          QT_ORDINARY););
@@ -2316,7 +2803,7 @@ JOIN::exec()
 	  WHERE clause for any tables after the sorted one.
 	*/
 	JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables+1];
-	JOIN_TAB *end_table= &curr_join->join_tab[curr_join->tables];
+	JOIN_TAB *end_table= &curr_join->join_tab[curr_join->top_join_tab_count];
 	for (; curr_table < end_table ; curr_table++)
 	{
 	  /*
@@ -2353,9 +2840,8 @@ JOIN::exec()
 			     HA_POS_ERROR : unit->select_limit_cnt),
                             curr_join->group_list ? TRUE : FALSE))
 	DBUG_VOID_RETURN;
-      sort_index_created= true;
       sortorder= curr_join->sortorder;
-      if (curr_join->const_tables != curr_join->tables &&
+      if (curr_join->const_tables != curr_join->table_count &&
           !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache)
       {
         /*
@@ -2385,16 +2871,6 @@ JOIN::exec()
   error= do_select(curr_join, curr_fields_list, NULL, procedure);
   thd->limit_found_rows= curr_join->send_records;
 
-  if (sort_index_created && curr_join->tables != curr_join->const_tables )
-  {
-    // Restore the original "select" used by create_sort_index():
-    JOIN_TAB *const tab= curr_join->join_tab + curr_join->const_tables;
-    if (tab->saved_select)
-    {
-      tab->select= tab->saved_select;
-      tab->saved_select= NULL;
-    }
-  }
   /* Accumulate the counts from all join iterations of all join parts. */
   thd->examined_row_count+= curr_join->examined_rows;
   DBUG_PRINT("counts", ("thd->examined_row_count: %lu",
@@ -2432,9 +2908,12 @@ JOIN::destroy()
   {
     if (join_tab != tmp_join->join_tab)
     {
-      JOIN_TAB *tab, *end;
-      for (tab= join_tab, end= tab+tables ; tab != end ; tab++)
+      JOIN_TAB *tab;
+      for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES);
+           tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+      {
 	tab->cleanup();
+      }
     }
     tmp_join->tmp_join= 0;
     /*
@@ -2443,10 +2922,11 @@ JOIN::destroy()
       anywhere else (as we need to keep the join is reusable).
     */
     tmp_table_param.cleanup();
-    tmp_table_param.copy_field= tmp_join->tmp_table_param.copy_field= 0;
+    tmp_join->tmp_table_param.copy_field= 0;
     DBUG_RETURN(tmp_join->destroy());
   }
   cond_equal= 0;
+  having_equal= 0;
 
   cleanup(1);
  /* Cleanup items referencing temporary table columns */
@@ -2457,6 +2937,7 @@ JOIN::destroy()
   if (exec_tmp_table2)
     free_tmp_table(thd, exec_tmp_table2);
   delete select;
+  destroy_sj_tmp_tables(this);
   delete_dynamic(&keyuse);
   delete procedure;
   DBUG_RETURN(error);
@@ -2465,6 +2946,7 @@ JOIN::destroy()
 
 void JOIN::cleanup_item_list(List<Item> &items) const
 {
+  DBUG_ENTER("JOIN::cleanup_item_list");
   if (!items.is_empty())
   {
     List_iterator_fast<Item> it(items);
@@ -2472,6 +2954,7 @@ void JOIN::cleanup_item_list(List<Item> &items) const
     while ((item= it++))
       item->cleanup();
   }
+  DBUG_VOID_RETURN;
 }
 
 
@@ -2560,10 +3043,9 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
       }
       else
       {
-        err= join->prepare(rref_pointer_array, tables, wild_num,
-                           conds, og_num, order, group, having, proc_param,
-                           select_lex, unit);
-        if (err)
+        if ((err= join->prepare(rref_pointer_array, tables, wild_num,
+                                conds, og_num, order, false, group, having,
+                                proc_param, select_lex, unit)))
 	{
 	  goto err;
 	}
@@ -2574,14 +3056,20 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
   }
   else
   {
+    /*
+      When in EXPLAIN, delay deleting the joins so that they are still
+      available when we're producing EXPLAIN EXTENDED warning text.
+    */
+    if (select_options & SELECT_DESCRIBE)
+      free_join= 0;
+
     if (!(join= new JOIN(thd, fields, select_options, result)))
 	DBUG_RETURN(TRUE);
     thd_proc_info(thd, "init");
-    thd->lex->used_tables=0;                         // Updated by setup_fields
-    err= join->prepare(rref_pointer_array, tables, wild_num,
-                       conds, og_num, order, group, having, proc_param,
-                       select_lex, unit);
-    if (err)
+    thd->lex->used_tables=0;
+    if ((err= join->prepare(rref_pointer_array, tables, wild_num,
+                            conds, og_num, order, false, group, having, proc_param,
+                            select_lex, unit)))
     {
       goto err;
     }
@@ -2619,6 +3107,7 @@ err:
   DBUG_RETURN(join->error);
 }
 
+
 /*****************************************************************************
   Create JOIN_TABS, make a guess about the table types,
   Approximate how many records will be used in each table
@@ -2636,8 +3125,9 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
   if (select)
   {
     select->head=table;
+    table->reginfo.impossible_range=0;
     if ((error= select->test_quick_select(thd, *(key_map *)keys,(table_map) 0,
-                                          limit, 0)) == 1)
+                                          limit, 0, FALSE)) == 1)
       DBUG_RETURN(select->quick->records);
     if (error == -1)
     {
@@ -2663,6 +3153,7 @@ typedef struct st_sargable_param
   uint num_values;           /* number of values in the above array      */
 } SARGABLE_PARAM;  
 
+
 /**
   Calculate the best possible join and initialize the join structure.
 
@@ -2673,28 +3164,42 @@ typedef struct st_sargable_param
 */
 
 static bool
-make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
-		     DYNAMIC_ARRAY *keyuse_array)
+make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
+                     COND *conds, DYNAMIC_ARRAY *keyuse_array)
 {
-  int error;
+  int error= 0;
   TABLE *table;
-  TABLE_LIST *tables= tables_arg;
   uint i,table_count,const_count,key;
   table_map found_const_table_map, all_table_map, found_ref, refs;
   key_map const_ref, eq_part;
+  bool has_expensive_keyparts;
   TABLE **table_vector;
-  JOIN_TAB *stat,*stat_end,*s,**stat_ref;
+  JOIN_TAB *stat,*stat_end,*s,**stat_ref, **stat_vector;
   KEYUSE *keyuse,*start_keyuse;
   table_map outer_join=0;
+  table_map no_rows_const_tables= 0;
   SARGABLE_PARAM *sargables= 0;
-  JOIN_TAB *stat_vector[MAX_TABLES+1];
+  List_iterator<TABLE_LIST> ti(tables_list);
+  TABLE_LIST *tables;
   DBUG_ENTER("make_join_statistics");
 
-  table_count=join->tables;
-  stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*table_count);
-  stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES);
-  table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*(table_count*2));
-  if (!stat || !stat_ref || !table_vector)
+  LINT_INIT(table); /* inited in all loops */
+  table_count=join->table_count;
+
+  stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*(table_count));
+  stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*
+                                         (MAX_TABLES + table_count + 1));
+  stat_vector= stat_ref + MAX_TABLES;
+  table_vector=(TABLE**) join->thd->calloc(sizeof(TABLE*)*(table_count*2));
+  join->positions= new (join->thd->mem_root) POSITION[(table_count+1)];
+  /*
+    best_positions is ok to allocate with alloc() as we copy things to it with
+    memcpy()
+  */
+  join->best_positions= (POSITION*) join->thd->alloc(sizeof(POSITION)*
+                                                     (table_count +1));
+
+  if (join->thd->is_fatal_error)
     DBUG_RETURN(1);				// Eom /* purecov: inspected */
 
   join->best_ref=stat_vector;
@@ -2703,9 +3208,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
   found_const_table_map= all_table_map=0;
   const_count=0;
 
-  for (s= stat, i= 0;
-       tables;
-       s++, tables= tables->next_leaf, i++)
+  for (s= stat, i= 0; (tables= ti++); s++, i++)
   {
     TABLE_LIST *embedding= tables->embedding;
     stat_vector[i]=s;
@@ -2713,36 +3216,33 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     s->const_keys.init();
     s->checked_keys.init();
     s->needed_reg.init();
-    s->filesort_used_loose_index_scan= false;
-    s->filesort_used_loose_index_scan_agg_distinct= false;
     table_vector[i]=s->table=table=tables->table;
     table->pos_in_table_list= tables;
-    error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+    error= tables->fetch_number_of_rows();
 
-    DBUG_EXECUTE_IF("bug11747970_raise_error",
-                    {
-                      if (!error)
-                      {
-                        my_error(ER_UNKNOWN_ERROR, MYF(0));
-                        goto error;
-                      }
-                    });
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+    const bool no_partitions_used= table->no_partitions_used;
+#else
+    const bool no_partitions_used= FALSE;
+#endif
 
+    DBUG_EXECUTE_IF("bug11747970_raise_error",
+                    { join->thd->killed= KILL_QUERY_HARD; });
     if (error)
     {
       table->file->print_error(error, MYF(0));
       goto error;
     }
     table->quick_keys.clear_all();
+    table->intersect_keys.clear_all();
     table->reginfo.join_tab=s;
     table->reginfo.not_exists_optimize=0;
     bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
     all_table_map|= table->map;
+    s->preread_init_done= FALSE;
     s->join=join;
-    s->info=0;					// For describe
 
     s->dependent= tables->dep_tables;
-    s->key_dependent= 0;
     if (tables->schema_table)
       table->file->stats.records= 2;
     table->quick_condition_rows= table->file->stats.records;
@@ -2751,13 +3251,13 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     if (*s->on_expr_ref)
     {
       /* s is the only inner table of an outer join */
-#ifdef WITH_PARTITION_STORAGE_ENGINE
-      if ((!table->file->stats.records || table->no_partitions_used) && !embedding)
-#else
-      if (!table->file->stats.records && !embedding)
-#endif
+      if (!table->is_filled_at_execution() &&
+          ((!table->file->stats.records &&
+            (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) ||
+           no_partitions_used) && !embedding)
       {						// Empty table
         s->dependent= 0;                        // Ignore LEFT JOIN depend.
+        no_rows_const_tables |= table->map;
 	set_position(join,const_count++,s,(KEYUSE*) 0);
 	continue;
       }
@@ -2771,8 +3271,19 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       /* s belongs to a nested join, maybe to several embedded joins */
       s->embedding_map= 0;
+      bool inside_an_outer_join= FALSE;
       do
       {
+        /* 
+          If this is a semi-join nest, skip it, and proceed upwards. Maybe
+          we're in some outer join nest
+        */
+        if (embedding->sj_on_expr)
+        {
+          embedding= embedding->embedding;
+          continue;
+        }
+        inside_an_outer_join= TRUE;
         NESTED_JOIN *nested_join= embedding->nested_join;
         s->embedding_map|=nested_join->nj_map;
         s->dependent|= embedding->dep_tables;
@@ -2780,22 +3291,30 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
         outer_join|= nested_join->used_tables;
       }
       while (embedding);
-      continue;
+      if (inside_an_outer_join)
+        continue;
     }
-#ifdef WITH_PARTITION_STORAGE_ENGINE
-    const bool no_partitions_used= table->no_partitions_used;
-#else
-    const bool no_partitions_used= FALSE;
-#endif
-    if ((table->s->system || table->file->stats.records <= 1 ||
+    if (!table->is_filled_at_execution() &&
+        (table->s->system ||
+         (table->file->stats.records <= 1 &&
+          (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) ||
          no_partitions_used) &&
 	!s->dependent &&
-	(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
         !table->fulltext_searched && !join->no_const_tables)
     {
       set_position(join,const_count++,s,(KEYUSE*) 0);
+      no_rows_const_tables |= table->map;
+    }
+    
+    /* SJ-Materialization handling: */
+    if (table->pos_in_table_list->jtbm_subselect &&
+        table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
+    {
+      set_position(join,const_count++,s,(KEYUSE*) 0);
+      no_rows_const_tables |= table->map;
     }
   }
+
   stat_vector[i]=0;
   join->outer_join=outer_join;
 
@@ -2804,83 +3323,98 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     /* 
        Build transitive closure for relation 'to be dependent on'.
        This will speed up the plan search for many cases with outer joins,
-       as well as allow us to catch illegal cross references.
+       as well as allow us to catch illegal cross references/
        Warshall's algorithm is used to build the transitive closure.
-       As we may restart the outer loop upto 'table_count' times, the
-       complexity of the algorithm is O((number of tables)^3).
-       However, most of the iterations will be shortcircuited when
-       there are no pedendencies to propogate.
+       As we use bitmaps to represent the relation the complexity
+       of the algorithm is O((number of tables)^2).
+
+       The classic form of the Warshall's algorithm would look like: 
+       for (i= 0; i < table_count; i++)
+       {
+         for (j= 0; j < table_count; j++)
+         {
+           for (k= 0; k < table_count; k++)
+           {
+             if (bitmap_is_set(stat[j].dependent, i) &&
+                 bitmap_is_set(stat[i].dependent, k))
+               bitmap_set_bit(stat[j].dependent, k);
+           }
+         }
+       }  
     */
-    for (i= 0 ; i < table_count ; i++)
+    
+    for (s= stat ; s < stat_end ; s++)
     {
-      uint j;
-      table= stat[i].table;
-
-      if (!table->reginfo.join_tab->dependent)
-        continue;
-
-      /* Add my dependencies to other tables depending on me */
-      for (j= 0, s= stat ; j < table_count ; j++, s++)
+      table= s->table;
+      for (JOIN_TAB *t= stat ; t < stat_end ; t++)
       {
-        if (s->dependent & table->map)
-        {
-          table_map was_dependent= s->dependent;
-          s->dependent |= table->reginfo.join_tab->dependent;
-          /*
-            If we change dependencies for a table we already have
-            processed: Redo dependency propagation from this table.
-          */
-          if (i > j && s->dependent != was_dependent)
-          {
-            i = j-1;
-            break;
-          }
-        }
+        if (t->dependent & table->map)
+          t->dependent |= table->reginfo.join_tab->dependent;
       }
+      if (outer_join & s->table->map)
+        s->table->maybe_null= 1;
     }
-
+    /* Catch illegal cross references for outer joins */
     for (i= 0, s= stat ; i < table_count ; i++, s++)
     {
-      /* Catch illegal cross references for outer joins */
       if (s->dependent & s->table->map)
       {
-        join->tables=0;			// Don't use join->table
+        join->table_count=0;			// Don't use join->table
         my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
         goto error;
       }
-
-      if (outer_join & s->table->map)
-        s->table->maybe_null= 1;
       s->key_dependent= s->dependent;
     }
   }
 
   if (conds || outer_join)
-    if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables,
-                            conds, join->cond_equal,
-                            ~outer_join, join->select_lex, &sargables))
+  {
+    if (update_ref_and_keys(join->thd, keyuse_array, stat, join->table_count,
+                            conds, ~outer_join, join->select_lex, &sargables))
       goto error;
+    /*
+      Keyparts without prefixes may be useful if this JOIN is a subquery, and
+      if the subquery may be executed via the IN-EXISTS strategy.
+    */
+    bool skip_unprefixed_keyparts=
+      !(join->is_in_subquery() &&
+        ((Item_in_subselect*)join->unit->item)->test_strategy(SUBS_IN_TO_EXISTS));
 
-  /* Read tables with 0 or 1 rows (system tables) */
-  join->const_table_map= 0;
+    if (keyuse_array->elements &&
+        sort_and_filter_keyuse(join->thd, keyuse_array,
+                               skip_unprefixed_keyparts))
+      goto error;
+    DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
+  }
+
+  join->const_table_map= no_rows_const_tables;
+  join->const_tables= const_count;
+  eliminate_tables(join);
+  join->const_table_map &= ~no_rows_const_tables;
+  const_count= join->const_tables;
+  found_const_table_map= join->const_table_map;
 
+  /* Read tables with 0 or 1 rows (system tables) */
   for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count;
        p_pos < p_end ;
        p_pos++)
   {
-    int tmp;
     s= p_pos->table;
-    s->type=JT_SYSTEM;
-    join->const_table_map|=s->table->map;
-    if ((tmp=join_read_const_table(s, p_pos)))
-    {
-      if (tmp > 0)
-	goto error;		// Fatal error
-    }
-    else
+    if (! (s->table->map & join->eliminated_tables))
     {
-      found_const_table_map|= s->table->map;
-      s->table->pos_in_table_list->optimized_away= TRUE;
+      int tmp;
+      s->type=JT_SYSTEM;
+      join->const_table_map|=s->table->map;
+      if ((tmp=join_read_const_table(s, p_pos)))
+      {
+        if (tmp > 0)
+          goto error;		// Fatal error
+      }
+      else
+      {
+        found_const_table_map|= s->table->map;
+        s->table->pos_in_table_list->optimized_away= TRUE;
+      }
     }
   }
 
@@ -2901,12 +3435,16 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       table=s->table;
 
+      if (table->is_filled_at_execution())
+        continue;
+
       /* 
         If equi-join condition by a key is null rejecting and after a
         substitution of a const table the key value happens to be null
         then we can state that there are no matches for this equi-join.
       */  
-      if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map)
+      if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map &&
+         !(table->map & join->eliminated_tables))
       {
         /* 
           When performing an outer join operation if there are no matching rows
@@ -2920,7 +3458,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	*/              
         while (keyuse->table == table)
         {
-          if (!(keyuse->val->used_tables() & ~join->const_table_map) &&
+          if (!keyuse->is_for_hash_join() && 
+              !(keyuse->val->used_tables() & ~join->const_table_map) &&
               keyuse->val->is_null() && keyuse->null_rejecting)
           {
             s->type= JT_CONST;
@@ -2941,7 +3480,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	  continue;
 	if (table->file->stats.records <= 1L &&
 	    (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
-            !table->pos_in_table_list->embedding)
+            !table->pos_in_table_list->embedding &&
+	      !((outer_join & table->map) && 
+		(*s->on_expr_ref)->is_expensive()))
 	{					// system table
 	  int tmp= 0;
 	  s->type=JT_SYSTEM;
@@ -2963,19 +3504,29 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	s->type= JT_REF;
 	while (keyuse->table == table)
 	{
+          if (keyuse->is_for_hash_join())
+	  {
+            keyuse++;
+            continue;
+          }
 	  start_keyuse=keyuse;
 	  key=keyuse->key;
-	  s->keys.set_bit(key);               // QQ: remove this ?
+	  s->keys.set_bit(key);               // TODO: remove this ?
 
 	  refs=0;
           const_ref.clear_all();
 	  eq_part.clear_all();
+          has_expensive_keyparts= false;
 	  do
 	  {
 	    if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize)
 	    {
 	      if (!((~found_const_table_map) & keyuse->used_tables))
+              {
 		const_ref.set_bit(keyuse->keypart);
+                if (keyuse->val->is_expensive())
+                  has_expensive_keyparts= true;
+              }
 	      else
 		refs|=keyuse->used_tables;
 	      eq_part.set_bit(keyuse->keypart);
@@ -2983,20 +3534,39 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	    keyuse++;
 	  } while (keyuse->table == table && keyuse->key == key);
 
-	  if (eq_part.is_prefix(table->key_info[key].key_parts) &&
+          TABLE_LIST *embedding= table->pos_in_table_list->embedding;
+          /*
+            TODO (low priority): currently we ignore the const tables that
+            are within a semi-join nest which is within an outer join nest.
+            The effect of this is that we don't do const substitution for
+            such tables.
+          */
+          KEY *keyinfo= table->key_info + key;
+          uint  key_parts= table->actual_n_key_parts(keyinfo);
+          if (eq_part.is_prefix(key_parts) &&
               !table->fulltext_searched && 
-              !table->pos_in_table_list->embedding)
+              (!embedding || (embedding->sj_on_expr && !embedding->embedding)))
 	  {
-            if (table->key_info[key].flags & HA_NOSAME)
+            key_map base_part, base_const_ref, base_eq_part;
+            base_part.set_prefix(keyinfo->key_parts); 
+            base_const_ref= const_ref;
+            base_const_ref.intersect(base_part);
+            base_eq_part= eq_part;
+            base_eq_part.intersect(base_part);
+            if (table->actual_key_flags(keyinfo) & HA_NOSAME)
             {
-	      if (const_ref == eq_part)
+              
+	      if (base_const_ref == base_eq_part &&
+                  !has_expensive_keyparts &&
+                  !((outer_join & table->map) &&
+                    (*s->on_expr_ref)->is_expensive()))
 	      {					// Found everything for ref.
 	        int tmp;
 	        ref_changed = 1;
 	        s->type= JT_CONST;
 	        join->const_table_map|=table->map;
 	        set_position(join,const_count++,s,start_keyuse);
-	        if (create_ref_for_key(join, s, start_keyuse,
+	        if (create_ref_for_key(join, s, start_keyuse, FALSE,
 				       found_const_table_map))
                   goto error;
 	        if ((tmp=join_read_const_table(s,
@@ -3012,14 +3582,17 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	      else
 	        found_ref|= refs;      // Table is const if all refs are const
 	    }
-            else if (const_ref == eq_part)
+            else if (base_const_ref == base_eq_part)
               s->const_keys.set_bit(key);
           }
 	}
       }
     }
   } while (join->const_table_map & found_ref && ref_changed);
-
+ 
+  join->sort_by_table= get_sort_by_table(join->order, join->group_list,
+                                         join->select_lex->leaf_tables,
+                                         join->const_table_map);
   /* 
     Update info on indexes that can be used for search lookups as
     reading const tables may has added new sargable predicates. 
@@ -3040,19 +3613,78 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     }
   }
 
+  join->impossible_where= false;
+  if (conds && const_count)
+  { 
+    COND_EQUAL *orig_cond_equal = join->cond_equal;
+    conds->update_used_tables();
+    conds= remove_eq_conds(join->thd, conds, &join->cond_value);
+    if (conds && conds->type() == Item::COND_ITEM &&
+        ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
+      join->cond_equal= &((Item_cond_and*) conds)->cond_equal;
+    join->select_lex->where= conds;
+    if (join->cond_value == Item::COND_FALSE)
+    {
+      join->impossible_where= true;
+      conds=new Item_int((longlong) 0,1);
+    }
+    join->conds= conds;      
+    join->cond_equal= NULL;
+    if (conds) 
+    { 
+      if (conds->type() == Item::COND_ITEM && 
+	  ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
+        join->cond_equal= (&((Item_cond_and *) conds)->cond_equal);
+      else if (conds->type() == Item::FUNC_ITEM &&
+	       ((Item_func*) conds)->functype() == Item_func::MULT_EQUAL_FUNC)
+      {
+        if (!join->cond_equal)
+          join->cond_equal= new COND_EQUAL;
+        join->cond_equal->current_level.empty();
+        join->cond_equal->current_level.push_back((Item_equal*) conds);
+      }
+    }
+
+    if (orig_cond_equal != join->cond_equal)
+    {
+      /*
+        If join->cond_equal has changed all references to it from COND_EQUAL
+        objects associated with ON expressions must be updated.
+      */
+      for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++) 
+      {
+        if (*s->on_expr_ref && s->cond_equal &&
+	    s->cond_equal->upper_levels == orig_cond_equal)
+          s->cond_equal->upper_levels= join->cond_equal;
+      }
+    }
+  }
+
   /* Calc how many (possible) matched records in each table */
 
   for (s=stat ; s < stat_end ; s++)
   {
+    s->startup_cost= 0;
     if (s->type == JT_SYSTEM || s->type == JT_CONST)
     {
       /* Only one matching row */
-      s->found_records=s->records=s->read_time=1; s->worst_seeks=1.0;
+      s->found_records= s->records= 1;
+      s->read_time=1.0; 
+      s->worst_seeks=1.0;
       continue;
     }
     /* Approximate found rows and time to read them */
-    s->found_records=s->records=s->table->file->stats.records;
-    s->read_time=(ha_rows) s->table->file->scan_time();
+    if (s->table->is_filled_at_execution())
+    {
+      get_delayed_table_estimates(s->table, &s->records, &s->read_time,
+                                  &s->startup_cost);
+      s->found_records= s->records;
+      table->quick_condition_rows=s->records;
+    }
+    else
+    {
+       s->scan_time();
+    }
 
     /*
       Set a max range of how many seeks we can expect when using keys
@@ -3069,9 +3701,21 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       all select distinct fields participate in one index.
     */
     add_group_and_distinct_keys(join, s);
-
-    if (!s->const_keys.is_clear_all() &&
-        !s->table->pos_in_table_list->embedding)
+    
+    /*
+      Perform range analysis if there are keys it could use (1). 
+      Don't do range analysis if we're on the inner side of an outer join (2).
+      Do range analysis if we're on the inner side of a semi-join (3).
+      Don't do range analysis for materialized subqueries (4).
+      Don't do range analysis for materialized derived tables (5)
+    */
+    if (!s->const_keys.is_clear_all() &&                            // (1)
+        (!s->table->pos_in_table_list->embedding ||                 // (2)
+         (s->table->pos_in_table_list->embedding &&                 // (3)
+          s->table->pos_in_table_list->embedding->sj_on_expr)) &&   // (3)
+        !s->table->is_filled_at_execution() &&                      // (4)
+        !(s->table->pos_in_table_list->derived &&                   // (5)
+          s->table->pos_in_table_list->is_materialized_derived()))  // (5)
     {
       ha_rows records;
       SQL_SELECT *select;
@@ -3109,31 +3753,79 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       if (records != HA_POS_ERROR)
       {
 	s->found_records=records;
-	s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0);
+	s->read_time= s->quick ? s->quick->read_time : 0.0;
       }
       delete select;
     }
   }
 
+  if (pull_out_semijoin_tables(join))
+    DBUG_RETURN(TRUE);
+
   join->join_tab=stat;
+  join->top_join_tab_count= table_count;
   join->map2table=stat_ref;
-  join->all_tables= table_vector;
+  join->table= table_vector;
   join->const_tables=const_count;
   join->found_const_table_map=found_const_table_map;
 
-  /* Find an optimal join order of the non-constant tables. */
-  if (join->const_tables != join->tables)
-  {
+  if (join->const_tables != join->table_count)
     optimize_keyuse(join, keyuse_array);
-    if (choose_plan(join, all_table_map & ~join->const_table_map))
-      goto error;
-  }
-  else
-  {
-    memcpy((uchar*) join->best_positions,(uchar*) join->positions,
-	   sizeof(POSITION)*join->const_tables);
-    join->best_read=1.0;
+   
+  DBUG_ASSERT(!join->conds || !join->cond_equal ||
+              !join->cond_equal->current_level.elements ||
+              (join->conds->type() == Item::COND_ITEM &&
+	       ((Item_cond*) (join->conds))->functype() ==
+               Item_func::COND_AND_FUNC && 
+               join->cond_equal ==
+	       &((Item_cond_and *) (join->conds))->cond_equal) ||
+              (join->conds->type() == Item::FUNC_ITEM &&
+	       ((Item_func*) (join->conds))->functype() ==
+               Item_func::MULT_EQUAL_FUNC &&
+	       join->cond_equal->current_level.elements == 1 &&
+               join->cond_equal->current_level.head() == join->conds));
+
+  if (optimize_semijoin_nests(join, all_table_map))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+
+  {
+    ha_rows records= 1;
+    SELECT_LEX_UNIT *unit= join->select_lex->master_unit();
+
+    /* Find an optimal join order of the non-constant tables. */
+    if (join->const_tables != join->table_count)
+    {
+      if (choose_plan(join, all_table_map & ~join->const_table_map))
+        goto error;
+    }
+    else
+    {
+      memcpy((uchar*) join->best_positions,(uchar*) join->positions,
+	     sizeof(POSITION)*join->const_tables);
+      join->record_count= 1.0;
+      join->best_read=1.0;
+    }
+  
+    if (!(join->select_options & SELECT_DESCRIBE) &&
+        unit->derived && unit->derived->is_materialized_derived())
+    {
+      /*
+        Calculate estimated number of rows for materialized derived
+        table/view.
+      */
+      for (i= 0; i < join->table_count ; i++)
+        records*= join->best_positions[i].records_read ?
+                  (ha_rows)join->best_positions[i].records_read : 1;
+      set_if_smaller(records, unit->select_limit_cnt);
+      join->select_lex->increase_derived_records(records);
+    }
   }
+
+  if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
+    goto error;
+
+  DEBUG_SYNC(join->thd, "inside_make_join_statistics");
+
   /* Generate an execution plan from the found optimal join order. */
   DBUG_RETURN(join->thd->killed || get_best_combination(join));
 
@@ -3144,8 +3836,12 @@ error:
     may not be assigned yet by this function (which is building join_tab).
     Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke. 
   */
-  for (tables= tables_arg; tables; tables= tables->next_leaf)
-    tables->table->reginfo.join_tab= NULL;
+  {    
+    TABLE_LIST *table;
+    List_iterator<TABLE_LIST> ti(tables_list);
+    while ((table= ti++))
+      table->table->reginfo.join_tab= NULL;
+  }
   DBUG_RETURN (1);
 }
 
@@ -3171,25 +3867,53 @@ typedef struct key_field_t {
   */
   bool          null_rejecting; 
   bool         *cond_guard; /* See KEYUSE::cond_guard */
+  uint          sj_pred_no; /* See KEYUSE::sj_pred_no */
 } KEY_FIELD;
 
-/* Values in optimize */
-#define KEY_OPTIMIZE_EXISTS		1
-#define KEY_OPTIMIZE_REF_OR_NULL	2
-
 /**
   Merge new key definitions to old ones, remove those not used in both.
 
   This is called for OR between different levels.
 
-  To be able to do 'ref_or_null' we merge a comparison of a column
-  and 'column IS NULL' to one test.  This is useful for sub select queries
-  that are internally transformed to something like:.
+  That is, the function operates on an array of KEY_FIELD elements which has
+  two parts:
+
+                      $LEFT_PART             $RIGHT_PART
+             +-----------------------+-----------------------+
+            start                new_fields                 end
+         
+  $LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two
+  parts of the OR condition. Our task is to produce an array of KEY_FIELD 
+  elements that would correspond to "$LEFT_PART OR $RIGHT_PART". 
+  
+  The rules for combining elements are as follows:
+
+    (keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)=
+     
+     = AND_ij (keyfieldA_i OR keyfieldB_j)
+  
+  We discard all (keyfieldA_i OR keyfieldB_j) that refer to different
+  fields. For those referring to the same field, the logic is as follows:
+    
+    t.keycol=expr1 OR t.keycol=expr2 -> (since expr1 and expr2 are different 
+                                         we can't produce a single equality,
+                                         so produce nothing)
+
+    t.keycol=expr1 OR t.keycol=expr1 -> t.keycol=expr1
+
+    t.keycol=expr1 OR t.keycol IS NULL -> t.keycol=expr1, and also set
+                                          KEY_OPTIMIZE_REF_OR_NULL flag
+
+  The last one is for ref_or_null access. We have handling for this special
+  because it's needed for evaluating IN subqueries that are internally
+  transformed into 
 
   @code
-  SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL 
+    EXISTS(SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL)
   @endcode
 
+  See add_key_fields() for discussion of what is and_level.
+
   KEY_FIELD::null_rejecting is processed as follows: @n
   result has null_rejecting=true if it is set for both ORed references.
   for example:
@@ -3263,20 +3987,26 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
                                 new_fields->null_rejecting);
 	}
 	else if (old->eq_func && new_fields->eq_func &&
-		 ((old->val->const_item() && old->val->is_null()) || 
-                  new_fields->val->is_null()))
+		 ((old->val->const_item() && !old->val->is_expensive() &&
+                   old->val->is_null()) ||
+                  (!new_fields->val->is_expensive() &&
+                   new_fields->val->is_null())))
 	{
 	  /* field = expression OR field IS NULL */
 	  old->level= and_level;
-	  old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
+          if (old->field->maybe_null())
+	  {
+	    old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
+            /* The referred expression can be NULL: */ 
+            old->null_rejecting= 0;
+	  }
 	  /*
             Remember the NOT NULL value unless the value does not depend
             on other tables.
           */
-	  if (!old->val->used_tables() && old->val->is_null())
+	  if (!old->val->used_tables() && !old->val->is_expensive() &&
+              old->val->is_null())
 	    old->val= new_fields->val;
-          /* The referred expression can be NULL: */ 
-          old->null_rejecting= 0;
 	}
 	else
 	{
@@ -3309,6 +4039,52 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
 }
 
 
+/*
+  Given a field, return its index in semi-join's select list, or UINT_MAX
+
+  DESCRIPTION
+    Given a field, we find its table; then see if the table is within a
+    semi-join nest and if the field was in select list of the subselect.
+    If it was, we return field's index in the select list. The value is used
+    by LooseScan strategy.
+*/
+
+static uint get_semi_join_select_list_index(Field *field)
+{
+  uint res= UINT_MAX;
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= field->table->pos_in_table_list->embedding) &&
+      emb_sj_nest->sj_on_expr)
+  {
+    Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred;
+    st_select_lex *subq_lex= subq_pred->unit->first_select();
+    if (subq_pred->left_expr->cols() == 1)
+    {
+      Item *sel_item= subq_lex->ref_pointer_array[0];
+      if (sel_item->type() == Item::FIELD_ITEM &&
+          ((Item_field*)sel_item)->field->eq(field))
+      {
+        res= 0;
+      }
+    }
+    else
+    {
+      for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+      {
+        Item *sel_item= subq_lex->ref_pointer_array[i];
+        if (sel_item->type() == Item::FIELD_ITEM &&
+            ((Item_field*)sel_item)->field->eq(field))
+        {
+          res= i;
+          break;
+        }
+      }
+    }
+  }
+  return res;
+}
+
+
 /**
   Add a possible key to array of possible keys if it's usable as a key
 
@@ -3318,6 +4094,7 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
     @param field           Field used in comparision
     @param eq_func         True if we used =, <=> or IS NULL
     @param value           Value used for comparison with field
+    @param num_values      Number of values[] that we are comparing against
     @param usable_tables   Tables which can be used for key optimization
     @param sargables       IN/OUT Array of found sargable candidates
 
@@ -3330,21 +4107,32 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
 */
 
 static void
-add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
+add_key_field(JOIN *join,
+              KEY_FIELD **key_fields,uint and_level, Item_func *cond,
               Field *field, bool eq_func, Item **value, uint num_values,
               table_map usable_tables, SARGABLE_PARAM **sargables)
 {
-  uint exists_optimize= 0;
-  if (!(field->flags & PART_KEY_FLAG))
+  uint optimize= 0;  
+  if (eq_func &&
+      ((join->is_allowed_hash_join_access() &&
+        field->hash_join_is_possible() && 
+        !(field->table->pos_in_table_list->is_materialized_derived() &&
+          field->table->created)) ||
+       (field->table->pos_in_table_list->is_materialized_derived() &&
+        !field->table->created && !(field->flags & BLOB_FLAG))))
+  {
+    optimize= KEY_OPTIMIZE_EQ;
+  }   
+  else if (!(field->flags & PART_KEY_FLAG))
   {
     // Don't remove column IS NULL on a LEFT JOIN table
     if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
         !field->table->maybe_null || field->null_ptr)
       return;					// Not a key. Skip it
-    exists_optimize= KEY_OPTIMIZE_EXISTS;
+    optimize= KEY_OPTIMIZE_EXISTS;
     DBUG_ASSERT(num_values == 1);
   }
-  else
+  if (optimize != KEY_OPTIMIZE_EXISTS)
   {
     table_map used_tables=0;
     bool optimizable=0;
@@ -3361,12 +4149,12 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
       if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
           !field->table->maybe_null || field->null_ptr)
 	return;					// Can't use left join optimize
-      exists_optimize= KEY_OPTIMIZE_EXISTS;
+      optimize= KEY_OPTIMIZE_EXISTS;
     }
     else
     {
       JOIN_TAB *stat=field->table->reginfo.join_tab;
-      key_map possible_keys=field->key_start;
+      key_map possible_keys=field->get_possible_keys();
       possible_keys.intersect(field->table->keys_in_use_for_query);
       stat[0].keys.merge(possible_keys);             // Add possible keys
 
@@ -3381,7 +4169,8 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
          Field BETWEEN ...
          Field IN ...
       */
-      stat[0].key_dependent|=used_tables;
+      if (field->flags & PART_KEY_FLAG)
+        stat[0].key_dependent|=used_tables;
 
       bool is_const=1;
       for (uint i=0; i<num_values; i++)
@@ -3404,30 +4193,21 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
         (*sargables)->arg_value= value;
         (*sargables)->num_values= num_values;
       }
+      if (!eq_func) // eq_func is NEVER true when num_values > 1
+        return;
+
       /*
-	We can't always use indexes when comparing a string index to a
-	number. cmp_type() is checked to allow compare of dates to numbers.
-        eq_func is NEVER true when num_values > 1
+	We can't use indexes when comparing a string index to a
+	number or two strings if the effective collation
+        of the operation differ from the field collation.
        */
-      if (!eq_func)
-        return;
-      if (field->result_type() == STRING_RESULT)
+
+      if (field->cmp_type() == STRING_RESULT)
       {
-        if ((*value)->result_type() != STRING_RESULT)
-        {
-          if (field->cmp_type() != (*value)->result_type())
-            return;
-        }
-        else
-        {
-          /*
-            We can't use indexes if the effective collation
-            of the operation differ from the field collation.
-          */
-          if (field->cmp_type() == STRING_RESULT &&
-              ((Field_str*)field)->charset() != cond->compare_collation())
+        if ((*value)->cmp_type() != STRING_RESULT)
             return;
-        }
+        if (((Field_str*)field)->charset() != cond->compare_collation())
+          return;
       }
     }
   }
@@ -3440,8 +4220,8 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
   (*key_fields)->field=		field;
   (*key_fields)->eq_func=	eq_func;
   (*key_fields)->val=		*value;
-  (*key_fields)->level=		and_level;
-  (*key_fields)->optimize=	exists_optimize;
+  (*key_fields)->level=         and_level;
+  (*key_fields)->optimize=      optimize;
   /*
     If the condition has form "tbl.keypart = othertbl.field" and 
     othertbl.field can be NULL, there will be no matches if othertbl.field 
@@ -3449,11 +4229,19 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
     We use null_rejecting in add_not_null_conds() to add
     'othertbl.field IS NOT NULL' to tab->select_cond.
   */
-  (*key_fields)->null_rejecting= ((cond->functype() == Item_func::EQ_FUNC ||
-                                   cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
-                                  ((*value)->type() == Item::FIELD_ITEM) &&
-                                  ((Item_field*)*value)->field->maybe_null());
+  {
+    Item *real= (*value)->real_item();
+    if (((cond->functype() == Item_func::EQ_FUNC) ||
+         (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
+        (real->type() == Item::FIELD_ITEM) &&
+        ((Item_field*)real)->field->maybe_null())
+      (*key_fields)->null_rejecting= true;
+    else
+      (*key_fields)->null_rejecting= false;
+  }
   (*key_fields)->cond_guard= NULL;
+
+  (*key_fields)->sj_pred_no= get_semi_join_select_list_index(field);
   (*key_fields)++;
 }
 
@@ -3480,29 +4268,29 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
 */
 
 static void
-add_key_equal_fields(KEY_FIELD **key_fields, uint and_level,
-                     Item_func *cond, Item_field *field_item,
+add_key_equal_fields(JOIN *join, KEY_FIELD **key_fields, uint and_level,
+                     Item_func *cond, Item *field_item,
                      bool eq_func, Item **val,
                      uint num_values, table_map usable_tables,
                      SARGABLE_PARAM **sargables)
 {
-  Field *field= field_item->field;
-  add_key_field(key_fields, and_level, cond, field,
+  Field *field= ((Item_field *) (field_item->real_item()))->field;
+  add_key_field(join, key_fields, and_level, cond, field,
                 eq_func, val, num_values, usable_tables, sargables);
-  Item_equal *item_equal= field_item->item_equal;
+  Item_equal *item_equal= field_item->get_item_equal();
   if (item_equal)
   { 
     /*
       Add to the set of possible key values every substitution of
       the field for an equal field included into item_equal
     */
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
-    while ((item= it++))
+    Item_equal_fields_iterator it(*item_equal);
+    while (it++)
     {
-      if (!field->eq(item->field))
+      Field *equal_field= it.get_curr_field();
+      if (!field->eq(equal_field))
       {
-        add_key_field(key_fields, and_level, cond, item->field,
+        add_key_field(join, key_fields, and_level, cond, equal_field,
                       eq_func, val, num_values, usable_tables,
                       sargables);
       }
@@ -3527,11 +4315,29 @@ static bool
 is_local_field (Item *field)
 {
   return field->real_item()->type() == Item::FIELD_ITEM
-    && !(field->used_tables() & OUTER_REF_TABLE_BIT)
-    && !((Item_field *)field->real_item())->depended_from;
+     && !(field->used_tables() & OUTER_REF_TABLE_BIT)
+    && !((Item_field *)field->real_item())->get_depended_from();
 }
 
 
+/*
+  In this and other functions, and_level is a number that is ever-growing
+  and is different for the contents of every AND or OR clause. For example,
+  when processing clause
+
+     (a AND b AND c) OR (x AND y)
+  
+  we'll have
+   * KEY_FIELD elements for (a AND b AND c) are assigned and_level=1
+   * KEY_FIELD elements for (x AND y) are assigned and_level=2
+   * OR operation is performed, and whatever elements are left after it are
+     assigned and_level=3.
+
+  The primary reason for having and_level attribute is the OR operation which 
+  uses and_level to mark KEY_FIELDs that should get into the result of the OR
+  operation
+*/
+
 static void
 add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
                COND *cond, table_map usable_tables,
@@ -3638,10 +4444,10 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         condition is of the form::
         '<field> BETWEEN value[1] AND value[2]'
       */
-      if (is_local_field (values[0]))
+      if (is_local_field(values[0]))
       {
         field_item= (Item_field *) (values[0]->real_item());
-        add_key_equal_fields(key_fields, *and_level, cond_func,
+        add_key_equal_fields(join, key_fields, *and_level, cond_func,
                              field_item, equal_func, &values[1],
                              num_values, usable_tables, sargables);
       }
@@ -3652,10 +4458,10 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
       */
       for (uint i= 1; i <= num_values; i++)
       {
-        if (is_local_field (values[i]))
+        if (is_local_field(values[i]))
         {
           field_item= (Item_field *) (values[i]->real_item());
-          add_key_equal_fields(key_fields, *and_level, cond_func,
+          add_key_equal_fields(join, key_fields, *and_level, cond_func,
                                field_item, equal_func, values,
                                1, usable_tables, sargables);
         }
@@ -3672,7 +4478,7 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         values--;
       DBUG_ASSERT(cond_func->functype() != Item_func::IN_FUNC ||
                   cond_func->argument_count() != 2);
-      add_key_equal_fields(key_fields, *and_level, cond_func,
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
                            (Item_field*) (cond_func->key_item()->real_item()),
                            0, values, 
                            cond_func->argument_count()-1,
@@ -3687,8 +4493,9 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
 
     if (is_local_field (cond_func->arguments()[0]))
     {
-      add_key_equal_fields(key_fields, *and_level, cond_func,
-	                (Item_field*) (cond_func->arguments()[0])->real_item(),
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
+                           (Item_field*) (cond_func->arguments()[0])->
+                           real_item(),
 		           equal_func,
                            cond_func->arguments()+1, 1, usable_tables,
                            sargables);
@@ -3696,8 +4503,9 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
     if (is_local_field (cond_func->arguments()[1]) &&
 	cond_func->functype() != Item_func::LIKE_FUNC)
     {
-      add_key_equal_fields(key_fields, *and_level, cond_func, 
-                       (Item_field*) (cond_func->arguments()[1])->real_item(),
+      add_key_equal_fields(join, key_fields, *and_level, cond_func, 
+                           (Item_field*) (cond_func->arguments()[1])->
+                           real_item(),
 		           equal_func,
                            cond_func->arguments(),1,usable_tables,
                            sargables);
@@ -3712,17 +4520,17 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
       Item *tmp=new Item_null;
       if (unlikely(!tmp))                       // Should never be true
 	return;
-      add_key_equal_fields(key_fields, *and_level, cond_func,
-		    (Item_field*) (cond_func->arguments()[0])->real_item(),
-		    cond_func->functype() == Item_func::ISNULL_FUNC,
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
+                           (Item_field*) (cond_func->arguments()[0])->
+                           real_item(),
+                           cond_func->functype() == Item_func::ISNULL_FUNC,
 			   &tmp, 1, usable_tables, sargables);
     }
     break;
   case Item_func::OPTIMIZE_EQUAL:
     Item_equal *item_equal= (Item_equal *) cond;
     Item *const_item= item_equal->get_const();
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
+    Item_equal_fields_iterator it(*item_equal);
     if (const_item)
     {
       /*
@@ -3730,9 +4538,10 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         field1=const_item as a condition allowing an index access of the table
         with field1 by the keys value of field1.
       */   
-      while ((item= it++))
+      while (it++)
       {
-        add_key_field(key_fields, *and_level, cond_func, item->field,
+        Field *equal_field= it.get_curr_field();
+        add_key_field(join, key_fields, *and_level, cond_func, equal_field,
                       TRUE, &const_item, 1, usable_tables, sargables);
       }
     }
@@ -3744,16 +4553,18 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         field1=field2 as a condition allowing an index access of the table
         with field1 by the keys value of field2.
       */   
-      Item_equal_iterator fi(*item_equal);
-      while ((item= fi++))
+      Item_equal_fields_iterator fi(*item_equal);
+      while (fi++)
       {
-        Field *field= item->field;
+        Field *field= fi.get_curr_field();
+        Item *item;
         while ((item= it++))
         {
-          if (!field->eq(item->field))
+          Field *equal_field= it.get_curr_field();
+          if (!field->eq(equal_field))
           {
-            add_key_field(key_fields, *and_level, cond_func, field,
-                          TRUE, (Item **) &item, 1, usable_tables,
+            add_key_field(join, key_fields, *and_level, cond_func, field,
+                          TRUE, &item, 1, usable_tables,
                           sargables);
           }
         }
@@ -3773,6 +4584,55 @@ max_part_bit(key_part_map bits)
   return found;
 }
 
+
+/**
+  Add a new keuse to the specified array of KEYUSE objects
+
+  @param[in,out]  keyuse_array  array of keyuses to be extended 
+  @param[in]      key_field     info on the key use occurrence
+  @param[in]      key           key number for the keyse to be added
+  @param[in]      part          key part for the keyuse to be added
+
+  @note
+  The function builds a new KEYUSE object for a key use utilizing the info
+  on the left and right parts of the given key use  extracted from the 
+  structure key_field, the key number and key part for this key use. 
+  The built object is added to the dynamic array keyuse_array.
+
+  @retval         0             the built object is succesfully added 
+  @retval         1             otherwise
+*/
+
+static bool
+add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
+          uint key, uint part)
+{
+  KEYUSE keyuse;
+  Field *field= key_field->field;
+
+  keyuse.table= field->table;
+  keyuse.val= key_field->val;
+  keyuse.key= key;
+  if (!is_hash_join_key_no(key))
+  {
+    keyuse.keypart=part;
+    keyuse.keypart_map= (key_part_map) 1 << part;
+  }
+  else
+  {
+    keyuse.keypart= field->field_index;
+    keyuse.keypart_map= (key_part_map) 0;
+  }
+  keyuse.used_tables= key_field->val->used_tables();
+  keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
+  keyuse.ref_table_rows= 0;
+  keyuse.null_rejecting= key_field->null_rejecting;
+  keyuse.cond_guard= key_field->cond_guard;
+  keyuse.sj_pred_no= key_field->sj_pred_no;
+  return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
+}
+
+
 /*
   Add all keys with uses 'field' for some keypart
   If field->and_level != and_level then only mark key_part as const_part
@@ -3783,11 +4643,10 @@ max_part_bit(key_part_map bits)
 */
 
 static bool
-add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field)
+add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
 {
   Field *field=key_field->field;
   TABLE *form= field->table;
-  KEYUSE keyuse;
 
   if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
   {
@@ -3798,25 +4657,30 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field)
       if (form->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
 	continue;    // ToDo: ft-keys in non-ft queries.   SerG
 
-      uint key_parts= (uint) form->key_info[key].key_parts;
+      KEY *keyinfo= form->key_info+key;
+      uint key_parts= form->actual_n_key_parts(keyinfo);
       for (uint part=0 ; part <  key_parts ; part++)
       {
 	if (field->eq(form->key_info[key].key_part[part].field))
 	{
-	  keyuse.table= field->table;
-	  keyuse.val =  key_field->val;
-	  keyuse.key =  key;
-	  keyuse.keypart=part;
-	  keyuse.keypart_map= (key_part_map) 1 << part;
-	  keyuse.used_tables=key_field->val->used_tables();
-	  keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
-          keyuse.null_rejecting= key_field->null_rejecting;
-          keyuse.cond_guard= key_field->cond_guard;
-	  if (insert_dynamic(keyuse_array,(uchar*) &keyuse))
+          if (add_keyuse(keyuse_array, key_field, key, part))
             return TRUE;
 	}
       }
     }
+    if (field->hash_join_is_possible() &&
+        (key_field->optimize & KEY_OPTIMIZE_EQ) &&
+        key_field->val->used_tables())
+    {
+      /* 
+        If a key use is extracted from an equi-join predicate then it is
+        added not only as a key use for every index whose component can
+        be evalusted utilizing this key use, but also as a key use for
+        hash join. Such key uses are marked with a special key number. 
+      */    
+      if (add_keyuse(keyuse_array, key_field, get_hash_join_key_no(), 0))
+        return TRUE;
+    }
   }
   return FALSE;
 }
@@ -3849,7 +4713,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
           ((functype == Item_func::GE_FUNC && arg1->val_real() > 0) ||
            (functype == Item_func::GT_FUNC && arg1->val_real() >=0)))
         cond_func= (Item_func_match *) arg0;
-      else if (arg0->const_item() &&
+      else if (arg0->const_item() && arg0->cols() == 1 &&
                 arg1->type() == Item::FUNC_ITEM &&
                 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
                ((functype == Item_func::LE_FUNC && arg0->val_real() > 0) ||
@@ -3884,6 +4748,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
   keyuse.used_tables=cond_func->key_item()->used_tables();
   keyuse.optimize= 0;
   keyuse.keypart_map= 0;
+  keyuse.sj_pred_no= UINT_MAX;
   return insert_dynamic(keyuse_array,(uchar*) &keyuse);
 }
 
@@ -3896,6 +4761,9 @@ sort_keyuse(KEYUSE *a,KEYUSE *b)
     return (int) (a->table->tablenr - b->table->tablenr);
   if (a->key != b->key)
     return (int) (a->key - b->key);
+  if (a->key == MAX_KEY && b->key == MAX_KEY && 
+      a->used_tables != b->used_tables)
+    return (int) ((ulong) a->used_tables - (ulong) b->used_tables);
   if (a->keypart != b->keypart)
     return (int) (a->keypart - b->keypart);
   // Place const values before other ones
@@ -3948,23 +4816,64 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
                                   SARGABLE_PARAM **sargables)
 {
   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
+  List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
+  bool have_another = FALSE;
   table_map tables= 0;
   TABLE_LIST *table;
   DBUG_ASSERT(nested_join_table->nested_join);
 
-  while ((table= li++))
+  while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
+                                            (table= li++))))
   {
     if (table->nested_join)
-      add_key_fields_for_nj(join, table, end, and_level, sargables);
+    {
+      if (!table->on_expr)
+      {
+        /* It's a semi-join nest. Walk into it as if it wasn't a nest */
+        have_another= TRUE;
+        li2= li;
+        li= List_iterator<TABLE_LIST>(table->nested_join->join_list); 
+      }
+      else
+        add_key_fields_for_nj(join, table, end, and_level, sargables);
+    }
     else
       if (!table->on_expr)
         tables |= table->table->map;
   }
-  add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
-                 sargables);
+  if (nested_join_table->on_expr)
+    add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
+                   sargables);
 }
 
 
+void count_cond_for_nj(SELECT_LEX *sel, TABLE_LIST *nested_join_table)
+{
+  List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
+  List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
+  bool have_another = FALSE;
+  TABLE_LIST *table;
+
+  while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
+                                            (table= li++))))
+  if (table->nested_join)
+  {
+    if (!table->on_expr)
+    {
+      /* It's a semi-join nest. Walk into it as if it wasn't a nest */
+      have_another= TRUE;
+      li2= li;
+      li= List_iterator<TABLE_LIST>(table->nested_join->join_list); 
+    }
+    else
+      count_cond_for_nj(sel, table); 
+  }
+  if (nested_join_table->on_expr)
+    nested_join_table->on_expr->walk(&Item::count_sargable_conds, 
+                                     0, (uchar*) sel);
+    
+}
+
 /**
   Update keyuse array with all possible keys we can use to fetch rows.
   
@@ -3988,14 +4897,36 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
 
 static bool
 update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
-                    uint tables, COND *cond, COND_EQUAL *cond_equal,
-                    table_map normal_tables, SELECT_LEX *select_lex,
-                    SARGABLE_PARAM **sargables)
+                    uint tables, COND *cond, table_map normal_tables,
+                    SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
 {
-  uint	and_level,i,found_eq_constant;
+  uint	and_level,i;
   KEY_FIELD *key_fields, *end, *field;
   uint sz;
   uint m= max(select_lex->max_equal_elems,1);
+  DBUG_ENTER("update_ref_and_keys");
+  DBUG_PRINT("enter", ("normal_tables: %llx", normal_tables));
+
+  SELECT_LEX *sel=thd->lex->current_select; 
+  sel->cond_count= 0;
+  sel->between_count= 0; 
+  if (cond)
+    cond->walk(&Item::count_sargable_conds, 0, (uchar*) sel);
+  for (i=0 ; i < tables ; i++)
+  {
+    if (*join_tab[i].on_expr_ref)
+      (*join_tab[i].on_expr_ref)->walk(&Item::count_sargable_conds,
+                                       0, (uchar*) sel);
+  }
+  {
+    List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
+    TABLE_LIST *table;
+    while ((table= li++))
+    {
+      if (table->nested_join)
+        count_cond_for_nj(sel, table);
+    }
+  }
   
   /* 
     We use the same piece of memory to store both  KEY_FIELD 
@@ -4019,10 +4950,9 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
     substitutions.
   */ 
   sz= max(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))*
-      (((thd->lex->current_select->cond_count+1)*2 +
-	thd->lex->current_select->between_count)*m+1);
+    ((sel->cond_count*2 + sel->between_count)*m+1);
   if (!(key_fields=(KEY_FIELD*)	thd->alloc(sz)))
-    return TRUE; /* purecov: inspected */
+    DBUG_RETURN(TRUE); /* purecov: inspected */
   and_level= 0;
   field= end= key_fields;
   *sargables= (SARGABLE_PARAM *) key_fields + 
@@ -4031,20 +4961,22 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
   (*sargables)[0].field= 0; 
 
   if (my_init_dynamic_array(keyuse,sizeof(KEYUSE),20,64))
-    return TRUE;
+    DBUG_RETURN(TRUE);
+
   if (cond)
   {
+    KEY_FIELD *saved_field= field;
     add_key_fields(join_tab->join, &end, &and_level, cond, normal_tables,
                    sargables);
     for (; field != end ; field++)
     {
-      if (add_key_part(keyuse,field))
-        return TRUE;
+
       /* Mark that we can optimize LEFT JOIN */
       if (field->val->type() == Item::NULL_ITEM &&
 	  !field->field->real_maybe_null())
 	field->field->table->reginfo.not_exists_optimize=1;
     }
+    field= saved_field;
   }
   for (i=0 ; i < tables ; i++)
   {
@@ -4079,79 +5011,94 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
   for ( ; field != end ; field++)
   {
     if (add_key_part(keyuse,field))
-      return TRUE;
+      DBUG_RETURN(TRUE);
   }
 
   if (select_lex->ftfunc_list->elements)
   {
     if (add_ft_keys(keyuse,join_tab,cond,normal_tables))
-      return TRUE;
+      DBUG_RETURN(TRUE);
   }
 
-  /*
-    Sort the array of possible keys and remove the following key parts:
-    - ref if there is a keypart which is a ref and a const.
-      (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
-      then we skip the key part corresponding to b=t2.d)
-    - keyparts without previous keyparts
-      (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
-      used in the query, we drop the partial key parts from consideration).
-    Special treatment for ft-keys.
-  */
-  if (keyuse->elements)
-  {
-    KEYUSE key_end,*prev,*save_pos,*use;
+  DBUG_RETURN(FALSE);
+}
 
-    my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE),
-	  (qsort_cmp) sort_keyuse);
 
-    bzero((char*) &key_end,sizeof(key_end));    /* Add for easy testing */
-    if (insert_dynamic(keyuse,(uchar*) &key_end))
-      return TRUE;
+/**
+  Sort the array of possible keys and remove the following key parts:
+  - ref if there is a keypart which is a ref and a const.
+    (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
+    then we skip the key part corresponding to b=t2.d)
+  - keyparts without previous keyparts
+    (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
+    used in the query, we drop the partial key parts from consideration).
+  Special treatment for ft-keys.
+*/
+
+static bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse, 
+                                   bool skip_unprefixed_keyparts)
+{
+  KEYUSE key_end, *prev, *save_pos, *use;
+  uint found_eq_constant, i;
+
+  DBUG_ASSERT(keyuse->elements);
 
-    use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
-    prev= &key_end;
-    found_eq_constant=0;
-    for (i=0 ; i < keyuse->elements-1 ; i++,use++)
+  my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
+           (qsort_cmp) sort_keyuse);
+
+  bzero((char*) &key_end, sizeof(key_end));    /* Add for easy testing */
+  if (insert_dynamic(keyuse, (uchar*) &key_end))
+    return TRUE;
+
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
+    generate_derived_keys(keyuse);
+
+  use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
+  prev= &key_end;
+  found_eq_constant= 0;
+  for (i=0 ; i < keyuse->elements-1 ; i++,use++)
+  {
+    if (!use->is_for_hash_join())
     {
       if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
-	use->table->const_key_parts[use->key]|= use->keypart_map;
+        use->table->const_key_parts[use->key]|= use->keypart_map;
       if (use->keypart != FT_KEYPART)
       {
-	if (use->key == prev->key && use->table == prev->table)
-	{
-	  if (prev->keypart+1 < use->keypart ||
-	      (prev->keypart == use->keypart && found_eq_constant))
-	    continue;				/* remove */
-	}
-	else if (use->keypart != 0)		// First found must be 0
-	  continue;
+        if (use->key == prev->key && use->table == prev->table)
+        {
+          if ((prev->keypart+1 < use->keypart && skip_unprefixed_keyparts) ||
+              (prev->keypart == use->keypart && found_eq_constant))
+            continue;				/* remove */
+        }
+        else if (use->keypart != 0 && skip_unprefixed_keyparts)
+          continue; /* remove - first found must be 0 */
       }
 
-#if defined(__GNUC__) && !MY_GNUC_PREREQ(4,4)
-      /*
-        Old gcc used a memcpy(), which is undefined if save_pos==use:
-        http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
-        http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
-      */
-      if (save_pos != use)
-#endif
-        *save_pos= *use;
-      prev=use;
+      prev= use;
       found_eq_constant= !use->used_tables;
-      /* Save ptr to first use */
-      if (!use->table->reginfo.join_tab->keyuse)
-	use->table->reginfo.join_tab->keyuse=save_pos;
       use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
-      save_pos++;
     }
-    i=(uint) (save_pos-(KEYUSE*) keyuse->buffer);
-    (void) set_dynamic(keyuse,(uchar*) &key_end,i);
-    keyuse->elements=i;
+    /*
+      Old gcc used a memcpy(), which is undefined if save_pos==use:
+      http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
+      http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
+      This also disables a valgrind warning, so better to have the test.
+    */
+    if (save_pos != use)
+      *save_pos= *use;
+    /* Save ptr to first use */
+    if (!use->table->reginfo.join_tab->keyuse)
+      use->table->reginfo.join_tab->keyuse= save_pos;
+    save_pos++;
   }
+  i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
+  (void) set_dynamic(keyuse,(uchar*) &key_end,i);
+  keyuse->elements= i;
+
   return FALSE;
 }
 
+
 /**
   Update some values in keyuse for faster choose_plan() loop.
 */
@@ -4176,12 +5123,15 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
 	(map= (keyuse->used_tables & ~join->const_table_map &
 	       ~OUTER_REF_TABLE_BIT)))
     {
-      uint tablenr;
-      for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ;
-      if (map == 1)			// Only one table
+      uint n_tables= my_count_bits(map);
+      if (n_tables == 1)			// Only one table
       {
-	TABLE *tmp_table=join->all_tables[tablenr];
-	keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100);
+        Table_map_iterator it(map);
+        int tablenr= it.next_bit();
+        DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END);
+	TABLE *tmp_table=join->table[tablenr];
+        if (tmp_table) // already created
+          keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100);
       }
     }
     /*
@@ -4194,6 +5144,7 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
 }
 
 
+
 /**
   Check for the presence of AGGFN(DISTINCT a) queries that may be subject
   to loose index scan.
@@ -4233,7 +5184,7 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
   bool result= false;
   Field_map first_aggdistinct_fields;
 
-  if (join->tables != 1 ||                    /* reference more than 1 table */
+  if (join->table_count != 1 ||                    /* reference more than 1 table */
       join->select_distinct ||                /* or a DISTINCT */
       join->select_lex->olap == ROLLUP_TYPE)  /* Check (B3) for ROLLUP */
     return false;
@@ -4367,14 +5318,17 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
 
 /** Save const tables first as used tables. */
 
-static void
-set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
+void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
 {
   join->positions[idx].table= table;
   join->positions[idx].key=key;
   join->positions[idx].records_read=1.0;	/* This is a const table */
   join->positions[idx].ref_depend_map= 0;
 
+//  join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
+  join->positions[idx].sj_strategy= SJ_OPT_NONE;
+  join->positions[idx].use_join_buffer= FALSE;
+
   /* Move the const table as down as possible in best_ref */
   JOIN_TAB **pos=join->best_ref+idx+1;
   JOIN_TAB *next=join->best_ref[idx];
@@ -4388,6 +5342,35 @@ set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
 }
 
 
+/* Estimate of the number matching candidates in the joined table */
+
+inline
+ha_rows matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint)
+{
+  ha_rows records= s->found_records;
+  /*
+    If there is a filtering condition on the table (i.e. ref analyzer found
+    at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
+    preceding this table in the join order we're now considering), then 
+    assume that 25% of the rows will be filtered out by this condition.
+
+    This heuristic is supposed to force tables used in exprZ to be before
+    this table in join order.
+  */
+  if (with_found_constraint)
+    records-= records/4;
+
+    /*
+      If applicable, get a more accurate estimate. Don't use the two
+      heuristics at once.
+    */
+  if (s->table->quick_condition_rows != s->found_records)
+    records= s->table->quick_condition_rows;
+
+  return records;
+}
+
+
 /**
   Find the best access path for an extension of a partial execution
   plan and add this path to the plan.
@@ -4405,23 +5388,28 @@ set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
   @param thd              thread for the connection that submitted the query
   @param remaining_tables set of tables not included into the partial plan yet
   @param idx              the length of the partial plan
+  @param disable_jbuf     TRUE<=> Don't use join buffering
   @param record_count     estimate for the number of records returned by the
                           partial plan
-  @param read_time        the cost of the partial plan
+  @param pos              OUT Table access plan
+  @param loose_scan_pos   OUT Table plan that uses loosescan, or set cost to 
+                              DBL_MAX if not possible.
 
   @return
     None
 */
 
-static void
+void
 best_access_path(JOIN      *join,
                  JOIN_TAB  *s,
-                 THD       *thd,
                  table_map remaining_tables,
                  uint      idx,
+                 bool      disable_jbuf,
                  double    record_count,
-                 double    read_time)
+                 POSITION *pos,
+                 POSITION *loose_scan_pos)
 {
+  THD *thd= join->thd;
   KEYUSE *best_key=         0;
   uint best_max_key_part=   0;
   my_bool found_constraint= 0;
@@ -4431,12 +5419,24 @@ best_access_path(JOIN      *join,
   table_map best_ref_depends_map= 0;
   double tmp;
   ha_rows rec;
+  bool best_uses_jbuf= FALSE;
+  MY_BITMAP *eq_join_set= &s->table->eq_join_set;
+  KEYUSE *hj_start_key= 0;
+
+  disable_jbuf= disable_jbuf || idx == join->const_tables;  
+
+  Loose_scan_opt loose_scan_opt;
   DBUG_ENTER("best_access_path");
+  
+  bitmap_clear_all(eq_join_set);
 
+  loose_scan_opt.init(join, s, remaining_tables);
+  
   if (s->keyuse)
   {                                            /* Use key if possible */
+    KEYUSE *keyuse;
+    KEYUSE *start_key=0;
     TABLE *table= s->table;
-    KEYUSE *keyuse,*start_key=0;
     double best_records= DBL_MAX;
     uint max_key_part=0;
 
@@ -4444,19 +5444,45 @@ best_access_path(JOIN      *join,
     rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE;  // Assumed records/key
     for (keyuse=s->keyuse ; keyuse->table == table ;)
     {
+      KEY *keyinfo;
+      ulong key_flags;
+      uint key_parts;
       key_part_map found_part= 0;
       table_map found_ref= 0;
       uint key= keyuse->key;
-      KEY *keyinfo= table->key_info+key;
       bool ft_key=  (keyuse->keypart == FT_KEYPART);
       /* Bitmap of keyparts where the ref access is over 'keypart=const': */
       key_part_map const_part= 0;
       /* The or-null keypart in ref-or-null access: */
       key_part_map ref_or_null_part= 0;
+      if (is_hash_join_key_no(key))
+      {
+        /* 
+          Hash join as any join employing join buffer can be used to join
+          only those tables that are joined after the first non const table
+	*/  
+        if (!(remaining_tables & keyuse->used_tables) &&
+            idx > join->const_tables)
+        {
+          if (!hj_start_key)
+            hj_start_key= keyuse;
+          bitmap_set_bit(eq_join_set, keyuse->keypart);
+        }
+        keyuse++;
+        continue;
+      }
+
+      keyinfo= table->key_info+key;
+      key_parts= table->actual_n_key_parts(keyinfo);
+      key_flags= table->actual_key_flags(keyinfo);
 
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
 
+      loose_scan_opt.next_ref_key();
+      DBUG_PRINT("info", ("Considering ref access on key %s",
+                          keyuse->table->key_info[keyuse->key].name));
+
       do /* For each keypart */
       {
         uint keypart= keyuse->keypart;
@@ -4465,12 +5491,13 @@ best_access_path(JOIN      *join,
         
         do /* For each way to access the keypart */
         {
-
           /*
             if 1. expression doesn't refer to forward tables
                2. we won't get two ref-or-null's
           */
           if (!(remaining_tables & keyuse->used_tables) &&
+              s->access_from_tables_is_allowed(keyuse->used_tables,
+                                               join->sjm_lookup_tables) &&
               !(ref_or_null_part && (keyuse->optimize &
                                      KEY_OPTIMIZE_REF_OR_NULL)))
           {
@@ -4478,8 +5505,8 @@ best_access_path(JOIN      *join,
             if (!(keyuse->used_tables & ~join->const_table_map))
               const_part|= keyuse->keypart_map;
 
-            double tmp2= prev_record_reads(join, idx, (found_ref |
-                                                      keyuse->used_tables));
+            double tmp2= prev_record_reads(join->positions, idx,
+                                           (found_ref | keyuse->used_tables));
             if (tmp2 < best_prev_record_reads)
             {
               best_part_found_ref= keyuse->used_tables & ~join->const_table_map;
@@ -4494,6 +5521,7 @@ best_access_path(JOIN      *join,
             if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
               ref_or_null_part |= keyuse->keypart_map;
           }
+          loose_scan_opt.add_keyuse(remaining_tables, keyuse);
           keyuse++;
         } while (keyuse->table == table && keyuse->key == key &&
                  keyuse->keypart == keypart);
@@ -4503,7 +5531,7 @@ best_access_path(JOIN      *join,
       /*
         Assume that that each key matches a proportional part of table.
       */
-      if (!found_part && !ft_key)
+      if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
         continue;                               // Nothing usable found
 
       if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
@@ -4518,22 +5546,23 @@ best_access_path(JOIN      *join,
           Really, there should be records=0.0 (yes!)
           but 1.0 would be probably safer
         */
-        tmp= prev_record_reads(join, idx, found_ref);
+        tmp= prev_record_reads(join->positions, idx, found_ref);
         records= 1.0;
       }
       else
       {
-        found_constraint= 1;
-        /*
-          Check if we found full key
-        */
-        if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
+        found_constraint= test(found_part);
+        loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
+
+        /* Check if we found full key */
+        if (found_part == PREV_BITS(uint, key_parts) &&
             !ref_or_null_part)
         {                                         /* use eq key */
           max_key_part= (uint) ~0;
-          if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
+          if ((key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
+	      test(key_flags & HA_EXT_NOSAME))
           {
-            tmp = prev_record_reads(join, idx, found_ref);
+            tmp = prev_record_reads(join->positions, idx, found_ref);
             records=1.0;
           }
           else
@@ -4567,7 +5596,8 @@ best_access_path(JOIN      *join,
             }
             else
             {
-              if (!(records=keyinfo->rec_per_key[keyinfo->key_parts-1]))
+              uint key_parts= table->actual_n_key_parts(keyinfo);
+              if (!(records=keyinfo->rec_per_key[key_parts-1]))
               {                                   /* Prefer longer keys */
                 records=
                   ((double) s->records / (double) rec *
@@ -4601,14 +5631,11 @@ best_access_path(JOIN      *join,
             tmp= records;
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-            {
-              /* we can use only index tree */
-              uint keys_per_block= table->file->stats.block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp= record_count*(tmp+keys_per_block-1)/keys_per_block;
-            }
+              tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
-              tmp= record_count*min(tmp,s->worst_seeks);
+              tmp= table->file->read_time(key, 1,
+                                          (ha_rows) min(tmp,s->worst_seeks));
+            tmp*= record_count;
           }
         }
         else
@@ -4769,20 +5796,20 @@ best_access_path(JOIN      *join,
             /* Limit the number of matched rows */
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-            {
-              /* we can use only index tree */
-              uint keys_per_block= table->file->stats.block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp= record_count*(tmp+keys_per_block-1)/keys_per_block;
-            }
+              tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
-              tmp= record_count*min(tmp,s->worst_seeks);
+              tmp= table->file->read_time(key, 1,
+                                          (ha_rows) min(tmp,s->worst_seeks));
+            tmp*= record_count;
           }
           else
             tmp= best_time;                    // Do nothing
         }
+
+        tmp += s->startup_cost;
+        loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
       } /* not ft_key */
-      if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
+      if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE)
       {
         best_time= tmp + records/(double) TIME_FOR_COMPARE;
         best= tmp;
@@ -4791,10 +5818,47 @@ best_access_path(JOIN      *join,
         best_max_key_part= max_key_part;
         best_ref_depends_map= found_ref;
       }
-    }
+    } /* for each key */
     records= best_records;
   }
 
+  /* 
+    If there is no key to access the table, but there is an equi-join
+    predicate connecting the table with the privious tables then we
+    consider the possibility of using hash join.
+    We need also to check that:
+    (1) s is inner table of semi-join -> join cache is allowed for semijoins
+    (2) s is inner table of outer join -> join cache is allowed for outer joins
+  */  
+  if (idx > join->const_tables && best_key == 0 &&
+      (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
+      join->max_allowed_join_cache_level > 2 &&
+     !bitmap_is_clear_all(eq_join_set) &&  !disable_jbuf &&
+      (!s->emb_sj_nest ||                     
+       join->allowed_semijoin_with_cache) &&    // (1)
+      (!(s->table->map & join->outer_join) ||
+       join->allowed_outer_join_with_cache))    // (2)
+  {
+    double join_sel= 0.1;
+    /* Estimate the cost of  the hash join access to the table */
+    ha_rows rnd_records= matching_candidates_in_table(s, found_constraint);
+
+    tmp= s->quick ? s->quick->read_time : s->scan_time();
+    tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+
+    /* We read the table as many times as join buffer becomes full. */
+    tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
+                          record_count /
+                          (double) thd->variables.join_buff_size));
+    best_time= tmp + 
+               (record_count*join_sel) / TIME_FOR_COMPARE * rnd_records;
+    best= tmp;
+    records= rows2double(rnd_records);
+    best_key= hj_start_key;
+    best_ref_depends_map= 0;
+    best_uses_jbuf= TRUE;
+   }
+
   /*
     Don't test table scan if it can't be better.
     Prefer key lookup if we would use the same key for scanning.
@@ -4804,7 +5868,7 @@ best_access_path(JOIN      *join,
     This is because table scans uses index and we would not win
     anything by using a table scan.
 
-    A word for word translation of the below if-statement in psergey's
+    A word for word translation of the below if-statement in sergefp's
     understanding: we check if we should use table scan if:
     (1) The found 'ref' access produces more records than a table scan
         (or index scan, or quick select), or 'ref' is more expensive than
@@ -4822,39 +5886,28 @@ best_access_path(JOIN      *join,
         Since we have a 'ref' access path, and FORCE INDEX instructs us to
         choose it over ALL/index, there is no need to consider a full table
         scan.
+    (5) Non-flattenable semi-joins: don't consider doing a scan of temporary
+        table if we had an option to make lookups into it. In real-world cases,
+        lookups are cheaper than full scans, but when the table is small, they
+        can be [considered to be] more expensive, which causes lookups not to 
+        be used for cases with small datasets, which is annoying.
   */
   if ((records >= s->found_records || best > s->read_time) &&            // (1)
       !(s->quick && best_key && s->quick->index == best_key->key &&      // (2)
         best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2)
       !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) &&   // (3)
         ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3)
-      !(s->table->force_index && best_key && !s->quick))                 // (4)
+      !(s->table->force_index && best_key && !s->quick) &&               // (4)
+      !(best_key && s->table->pos_in_table_list->jtbm_subselect))        // (5)
   {                                             // Check full join
-    ha_rows rnd_records= s->found_records;
-    /*
-      If there is a filtering condition on the table (i.e. ref analyzer found
-      at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
-      preceding this table in the join order we're now considering), then 
-      assume that 25% of the rows will be filtered out by this condition.
-
-      This heuristic is supposed to force tables used in exprZ to be before
-      this table in join order.
-    */
-    if (found_constraint)
-      rnd_records-= rnd_records/4;
-
-    /*
-      If applicable, get a more accurate estimate. Don't use the two
-      heuristics at once.
-    */
-    if (s->table->quick_condition_rows != s->found_records)
-      rnd_records= s->table->quick_condition_rows;
+    ha_rows rnd_records= matching_candidates_in_table(s, found_constraint);
 
     /*
       Range optimizer never proposes a RANGE if it isn't better
       than FULL: so if RANGE is present, it's always preferred to FULL.
       Here we estimate its cost.
     */
+
     if (s->quick)
     {
       /*
@@ -4869,12 +5922,14 @@ best_access_path(JOIN      *join,
       tmp= record_count *
         (s->quick->read_time +
          (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
+
+      loose_scan_opt.check_range_access(join, idx, s->quick);
     }
     else
     {
       /* Estimate cost of reading table. */
-      tmp= s->table->file->scan_time();
-      if (s->table->map & join->outer_join)     // Can't use join cache
+      tmp= s->scan_time();
+      if ((s->table->map & join->outer_join) || disable_jbuf)     // Can't use join cache
       {
         /*
           For each record we have to:
@@ -4902,6 +5957,7 @@ best_access_path(JOIN      *join,
       }
     }
 
+    tmp += s->startup_cost;
     /*
       We estimate the cost of evaluating WHERE clause for found records
       as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
@@ -4909,7 +5965,8 @@ best_access_path(JOIN      *join,
     */
     if (best == DBL_MAX ||
         (tmp  + record_count/(double) TIME_FOR_COMPARE*rnd_records <
-         best + record_count/(double) TIME_FOR_COMPARE*records))
+         (best_key->is_for_hash_join() ? best_time :
+          best + record_count/(double) TIME_FOR_COMPARE*records)))
     {
       /*
         If the table has a range (s->quick is set) make_join_select()
@@ -4920,15 +5977,21 @@ best_access_path(JOIN      *join,
       best_key= 0;
       /* range/index_merge/ALL/index access method are "independent", so: */
       best_ref_depends_map= 0;
+      best_uses_jbuf= test(!disable_jbuf && !((s->table->map & 
+                                               join->outer_join)));
     }
   }
 
   /* Update the cost information for the current partial plan */
-  join->positions[idx].records_read= records;
-  join->positions[idx].read_time=    best;
-  join->positions[idx].key=          best_key;
-  join->positions[idx].table=        s;
-  join->positions[idx].ref_depend_map= best_ref_depends_map;
+  pos->records_read= records;
+  pos->read_time=    best;
+  pos->key=          best_key;
+  pos->table=        s;
+  pos->ref_depend_map= best_ref_depends_map;
+  pos->loosescan_picker.loosescan_key= MAX_KEY;
+  pos->use_join_buffer= best_uses_jbuf;
+   
+  loose_scan_opt.save_to_position(s, loose_scan_pos);
 
   if (!best_key &&
       idx == join->const_tables &&
@@ -4940,6 +6003,114 @@ best_access_path(JOIN      *join,
 }
 
 
+/*
+  Find JOIN_TAB's embedding (i.e, parent) subquery.
+  - For merged semi-joins, tables inside the semi-join nest have their
+    semi-join nest as parent.  We intentionally ignore results of table 
+    pullout action here.
+  - For non-merged semi-joins (JTBM tabs), the embedding subquery is the 
+    JTBM join tab itself.
+*/
+
+static TABLE_LIST* get_emb_subq(JOIN_TAB *tab)
+{
+  TABLE_LIST *tlist= tab->table->pos_in_table_list;
+  if (tlist->jtbm_subselect)
+    return tlist;
+  TABLE_LIST *embedding= tlist->embedding;
+  if (!embedding || !embedding->sj_subq_pred)
+    return NULL;
+  return embedding;
+}
+
+
+/*
+  Choose initial table order that "helps" semi-join optimizations.
+
+  The idea is that we should start with the order that is the same as the one
+  we would have had if we had semijoin=off:
+  - Top-level tables go first
+  - subquery tables are grouped together by the subquery they are in,
+  - subquery tables are attached where the subquery predicate would have been
+    attached if we had semi-join off.
+  
+  This function relies on join_tab_cmp()/join_tab_cmp_straight() to produce
+  certain pre-liminary ordering, see compare_embedding_subqueries() for its
+  description.
+*/
+
+static void choose_initial_table_order(JOIN *join)
+{
+  TABLE_LIST *emb_subq;
+  JOIN_TAB **tab= join->best_ref + join->const_tables;
+  JOIN_TAB **tabs_end= tab + join->table_count - join->const_tables;
+  /* Find where the top-level JOIN_TABs end and subquery JOIN_TABs start */
+  for (; tab != tabs_end; tab++)
+  {
+    if ((emb_subq= get_emb_subq(*tab)))
+      break;
+  }
+  uint n_subquery_tabs= tabs_end - tab;
+
+  if (!n_subquery_tabs)
+    return;
+
+  /* Copy the subquery JOIN_TABs to a separate array */
+  JOIN_TAB *subquery_tabs[MAX_TABLES];
+  memcpy(subquery_tabs, tab, sizeof(JOIN_TAB*) * n_subquery_tabs);
+  
+  JOIN_TAB **last_top_level_tab= tab;
+  JOIN_TAB **subq_tab= subquery_tabs;
+  JOIN_TAB **subq_tabs_end= subquery_tabs + n_subquery_tabs;
+  TABLE_LIST *cur_subq_nest= NULL;
+  for (; subq_tab < subq_tabs_end; subq_tab++)
+  {
+    if (get_emb_subq(*subq_tab)!= cur_subq_nest)
+    {
+      /*
+        Reached the part of subquery_tabs that covers tables in some subquery.
+      */
+      cur_subq_nest= get_emb_subq(*subq_tab);
+
+      /* Determine how many tables the subquery has */
+      JOIN_TAB **last_tab_for_subq;
+      for (last_tab_for_subq= subq_tab;
+           last_tab_for_subq < subq_tabs_end && 
+           get_emb_subq(*last_tab_for_subq) == cur_subq_nest;
+           last_tab_for_subq++) {}
+      uint n_subquery_tables= last_tab_for_subq - subq_tab;
+
+      /* 
+        Walk the original array and find where this subquery would have been
+        attached to
+      */
+      table_map need_tables= cur_subq_nest->original_subq_pred_used_tables;
+      need_tables &= ~(join->const_table_map | PSEUDO_TABLE_BITS);
+      for (JOIN_TAB **top_level_tab= join->best_ref + join->const_tables;
+           top_level_tab < last_top_level_tab;
+           //top_level_tab < join->best_ref + join->table_count;
+           top_level_tab++)
+      {
+        need_tables &= ~(*top_level_tab)->table->map;
+        /* Check if this is the place where subquery should be attached */
+        if (!need_tables)
+        {
+          /* Move away the top-level tables that are after top_level_tab */
+          uint top_tail_len= last_top_level_tab - top_level_tab - 1;
+          memmove(top_level_tab + 1 + n_subquery_tables, top_level_tab + 1,
+                  sizeof(JOIN_TAB*)*top_tail_len);
+          last_top_level_tab += n_subquery_tables;
+          memcpy(top_level_tab + 1, subq_tab, sizeof(JOIN_TAB*)*n_subquery_tables);
+          break;
+        }
+      }
+      DBUG_ASSERT(!need_tables);
+      subq_tab += n_subquery_tables - 1;
+    }
+  }
+}
+
+
 /**
   Selects and invokes a search strategy for an optimal query plan.
 
@@ -4963,7 +6134,7 @@ best_access_path(JOIN      *join,
     TRUE        Fatal error
 */
 
-static bool
+bool
 choose_plan(JOIN *join, table_map join_tables)
 {
   uint search_depth= join->thd->variables.optimizer_search_depth;
@@ -4972,19 +6143,45 @@ choose_plan(JOIN *join, table_map join_tables)
   DBUG_ENTER("choose_plan");
 
   join->cur_embedding_map= 0;
-  reset_nj_counters(join->join_list);
+  reset_nj_counters(join, join->join_list);
+  qsort2_cmp jtab_sort_func;
+
+  if (join->emb_sjm_nest)
+  {
+    /* We're optimizing semi-join materialization nest, so put the 
+       tables from this semi-join as first
+    */
+    jtab_sort_func= join_tab_cmp_embedded_first;
+  }
+  else
+  {
+    /*
+      if (SELECT_STRAIGHT_JOIN option is set)
+        reorder tables so dependent tables come after tables they depend 
+        on, otherwise keep tables in the order they were specified in the query 
+      else
+        Apply heuristic: pre-sort all access plans with respect to the number of
+        records accessed.
+    */
+    jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
+  }
+
   /*
-    if (SELECT_STRAIGHT_JOIN option is set)
-      reorder tables so dependent tables come after tables they depend 
-      on, otherwise keep tables in the order they were specified in the query 
-    else
-      Apply heuristic: pre-sort all access plans with respect to the number of
-      records accessed.
+    psergey-todo: if we're not optimizing an SJM nest, 
+     - sort that outer tables are first, and each sjm nest follows
+     - then, put each [sjm_table1, ... sjm_tableN] sub-array right where 
+       WHERE clause pushdown would have put it.
   */
-  my_qsort(join->best_ref + join->const_tables,
-           join->tables - join->const_tables, sizeof(JOIN_TAB*),
-           straight_join ? join_tab_cmp_straight : join_tab_cmp);
-  
+  my_qsort2(join->best_ref + join->const_tables,
+            join->table_count - join->const_tables, sizeof(JOIN_TAB*),
+            jtab_sort_func, (void*)join->emb_sjm_nest);
+
+  if (!join->emb_sjm_nest)
+  {
+    choose_initial_table_order(join);
+  }
+  join->cur_sj_inner_tables= 0;
+
   if (straight_join)
   {
     optimize_straight_join(join, join_tables);
@@ -5022,6 +6219,64 @@ choose_plan(JOIN *join, table_map join_tables)
 }
 
 
+/*
+  Compare two join tabs based on the subqueries they are from.
+   - top-level join tabs go first
+   - then subqueries are ordered by their select_id (we're using this 
+     criteria because we need a cross-platform, deterministic ordering)
+
+  @return 
+     0   -  equal
+     -1  -  jt1 < jt2
+     1   -  jt1 > jt2
+*/
+
+static int compare_embedding_subqueries(JOIN_TAB *jt1, JOIN_TAB *jt2)
+{
+  /* Determine if the first table is originally from a subquery */
+  TABLE_LIST *tbl1= jt1->table->pos_in_table_list;
+  uint tbl1_select_no;
+  if (tbl1->jtbm_subselect)
+  {
+    tbl1_select_no= 
+      tbl1->jtbm_subselect->unit->first_select()->select_number;
+  }
+  else if (tbl1->embedding && tbl1->embedding->sj_subq_pred)
+  {
+    tbl1_select_no= 
+      tbl1->embedding->sj_subq_pred->unit->first_select()->select_number;
+  }
+  else
+    tbl1_select_no= 1; /* Top-level */
+
+  /* Same for the second table */
+  TABLE_LIST *tbl2= jt2->table->pos_in_table_list;
+  uint tbl2_select_no;
+  if (tbl2->jtbm_subselect)
+  {
+    tbl2_select_no= 
+      tbl2->jtbm_subselect->unit->first_select()->select_number;
+  }
+  else if (tbl2->embedding && tbl2->embedding->sj_subq_pred)
+  {
+    tbl2_select_no= 
+      tbl2->embedding->sj_subq_pred->unit->first_select()->select_number;
+  }
+  else
+    tbl2_select_no= 1; /* Top-level */
+
+  /* 
+    Put top-level tables in front. Tables from within subqueries must follow,
+    grouped by their owner subquery. We don't care about the order that
+    subquery groups are in, because choose_initial_table_order() will re-order
+    the groups.
+  */
+  if (tbl1_select_no != tbl2_select_no)
+    return tbl1_select_no > tbl2_select_no ? 1 : -1;
+  return 0;
+}
+
+
 /**
   Compare two JOIN_TAB objects based on the number of accessed records.
 
@@ -5038,6 +6293,9 @@ choose_plan(JOIN *join, table_map join_tables)
       a: dependent = 0x0 table->map = 0x1 found_records = 3 ptr = 0x907e6b0
       b: dependent = 0x0 table->map = 0x2 found_records = 3 ptr = 0x907e838
       c: dependent = 0x6 table->map = 0x10 found_records = 2 ptr = 0x907ecd0
+
+   As for subuqueries, this function must produce order that can be fed to 
+   choose_initial_table_order().
      
   @retval
     1  if first is bigger
@@ -5048,11 +6306,19 @@ choose_plan(JOIN *join, table_map join_tables)
 */
 
 static int
-join_tab_cmp(const void* ptr1, const void* ptr2)
+join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2)
 {
   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
+  int cmp;
 
+  if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0)
+    return cmp;
+  /*
+    After that,
+    take care about ordering imposed by LEFT JOIN constraints,
+    possible [eq]ref accesses, and numbers of matching records in the table.
+  */
   if (jt1->dependent & jt2->table->map)
     return 1;
   if (jt2->dependent & jt1->table->map)
@@ -5070,11 +6336,23 @@ join_tab_cmp(const void* ptr1, const void* ptr2)
 */
 
 static int
-join_tab_cmp_straight(const void* ptr1, const void* ptr2)
+join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2)
 {
   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
 
+  /*
+    We don't do subquery flattening if the parent or child select has
+    STRAIGHT_JOIN modifier. It is complicated to implement and the semantics
+    is hardly useful.
+  */
+  DBUG_ASSERT(!jt1->emb_sj_nest);
+  DBUG_ASSERT(!jt2->emb_sj_nest);
+
+  int cmp;
+  if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0)
+    return cmp;
+
   if (jt1->dependent & jt2->table->map)
     return 1;
   if (jt2->dependent & jt1->table->map)
@@ -5082,6 +6360,38 @@ join_tab_cmp_straight(const void* ptr1, const void* ptr2)
   return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
 }
 
+
+/*
+  Same as join_tab_cmp but tables from within the given semi-join nest go 
+  first. Used when the optimizing semi-join materialization nests.
+*/
+
+static int
+join_tab_cmp_embedded_first(const void *emb,  const void* ptr1, const void* ptr2)
+{
+  const TABLE_LIST *emb_nest= (TABLE_LIST*) emb;
+  JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
+  JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
+
+  if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest)
+    return -1;
+  if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest)
+    return 1;
+
+  if (jt1->dependent & jt2->table->map)
+    return 1;
+  if (jt2->dependent & jt1->table->map)
+    return -1;
+
+  if (jt1->found_records > jt2->found_records)
+    return 1;
+  if (jt1->found_records < jt2->found_records)
+    return -1; 
+  
+  return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
+}
+
+
 /**
   Heuristic procedure to automatically guess a reasonable degree of
   exhaustiveness for the greedy search procedure.
@@ -5117,7 +6427,7 @@ join_tab_cmp_straight(const void* ptr1, const void* ptr2)
 static uint
 determine_search_depth(JOIN *join)
 {
-  uint table_count=  join->tables - join->const_tables;
+  uint table_count=  join->table_count - join->const_tables;
   uint search_depth;
   /* TODO: this value should be determined dynamically, based on statistics: */
   uint max_tables_for_exhaustive_opt= 7;
@@ -5163,28 +6473,35 @@ optimize_straight_join(JOIN *join, table_map join_tables)
 {
   JOIN_TAB *s;
   uint idx= join->const_tables;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
   double    record_count= 1.0;
   double    read_time=    0.0;
- 
+  POSITION  loose_scan_pos;
+
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     /* Find the best access method from 's' to the current partial plan */
-    best_access_path(join, s, join->thd, join_tables, idx,
-                     record_count, read_time);
+    best_access_path(join, s, join_tables, idx, disable_jbuf, record_count,
+                     join->positions + idx, &loose_scan_pos);
+
     /* compute the cost of the new plan extended with 's' */
     record_count*= join->positions[idx].records_read;
-    read_time+=    join->positions[idx].read_time;
+    read_time+= join->positions[idx].read_time +
+                record_count / (double) TIME_FOR_COMPARE;
+    advance_sj_state(join, join_tables, idx, &record_count, &read_time,
+                     &loose_scan_pos);
+
     join_tables&= ~(s->table->map);
     ++idx;
   }
 
-  read_time+= record_count / (double) TIME_FOR_COMPARE;
   if (join->sort_by_table &&
       join->sort_by_table != join->positions[join->const_tables].table->table)
     read_time+= record_count;  // We have to make a temp table
   memcpy((uchar*) join->best_positions, (uchar*) join->positions,
          sizeof(POSITION)*idx);
-  join->best_read= read_time;
+  join->record_count= record_count;
+  join->best_read= read_time - 0.001;
 }
 
 
@@ -5208,7 +6525,7 @@ optimize_straight_join(JOIN *join, table_map join_tables)
 
     All other cases are in-between these two extremes. Thus the parameter
     'search_depth' controlls the exhaustiveness of the search. The higher the
-    value, the longer the optimizaton time and possibly the better the
+    value, the longer the optimization time and possibly the better the
     resulting plan. The lower the value, the fewer alternative plans are
     estimated, but the more likely to get a bad QEP.
 
@@ -5282,11 +6599,18 @@ greedy_search(JOIN      *join,
   uint      size_remain;    // cardinality of remaining_tables
   POSITION  best_pos;
   JOIN_TAB  *best_table; // the next plan node to be added to the curr QEP
+  // ==join->tables or # tables in the sj-mat nest we're optimizing
+  uint      n_tables __attribute__((unused));
 
   DBUG_ENTER("greedy_search");
 
   /* number of tables that remain to be optimized */
-  size_remain= my_count_bits(remaining_tables);
+  n_tables= size_remain= my_count_bits(remaining_tables &
+                                       (join->emb_sjm_nest? 
+                                         (join->emb_sjm_nest->sj_inner_tables &
+                                          ~join->const_table_map)
+                                         :
+                                         ~(table_map)0));
 
   do {
     /* Find the extension of the current QEP with the lowest cost */
@@ -5306,7 +6630,7 @@ greedy_search(JOIN      *join,
         'join->best_positions' contains a complete optimal extension of the
         current partial QEP.
       */
-      DBUG_EXECUTE("opt", print_plan(join, join->tables,
+      DBUG_EXECUTE("opt", print_plan(join, n_tables,
                                      record_count, read_time, read_time,
                                      "optimal"););
       DBUG_RETURN(FALSE);
@@ -5334,6 +6658,7 @@ greedy_search(JOIN      *join,
     /* This has been already checked by best_extension_by_limited_search */
     DBUG_ASSERT(!is_interleave_error);
 
+
     /* find the position of 'best_table' in 'join->best_ref' */
     best_idx= idx;
     JOIN_TAB *pos= join->best_ref[best_idx];
@@ -5345,7 +6670,8 @@ greedy_search(JOIN      *join,
 
     /* compute the cost of the new plan extended with 'best_table' */
     record_count*= join->positions[idx].records_read;
-    read_time+=    join->positions[idx].read_time;
+    read_time+= join->positions[idx].read_time + 
+                record_count / (double) TIME_FOR_COMPARE;
 
     remaining_tables&= ~(best_table->table->map);
     --size_remain;
@@ -5359,6 +6685,177 @@ greedy_search(JOIN      *join,
 
 
 /**
+  Get cost of execution and fanout produced by selected tables in the join
+  prefix (where prefix is defined as prefix in depth-first traversal)
+ 
+  @param end_tab_idx               The number of last tab to be taken into
+                                   account (in depth-first traversal prefix)
+  @param filter_map                Bitmap of tables whose cost/fanout are to 
+                                   be taken into account.
+  @param read_time_arg     [out]   store read time here 
+  @param record_count_arg  [out]   store record count here
+
+  @note
+
+  @returns
+    read_time_arg and record_count_arg contain the computed cost and fanout
+*/
+
+void JOIN::get_partial_cost_and_fanout(int end_tab_idx,
+                                       table_map filter_map,
+                                       double *read_time_arg, 
+                                       double *record_count_arg)
+{
+  double record_count= 1;
+  double read_time= 0.0;
+  double sj_inner_fanout= 1.0;
+  JOIN_TAB *end_tab= NULL;
+  JOIN_TAB *tab;
+  int i;
+  int last_sj_table= MAX_TABLES;
+
+  /* 
+    Handle a special case where the join is degenerate, and produces no
+    records
+  */
+  if (table_count == const_tables)
+  {
+    *read_time_arg= 0.0;
+    /*
+      We return 1, because 
+       - it is the pessimistic estimate (there might be grouping)
+       - it's safer, as we're less likely to hit the edge cases in
+         calculations.
+    */
+    *record_count_arg=1.0;
+    return;
+  }
+
+  for (tab= first_depth_first_tab(this), i= const_tables;
+       tab;
+       tab= next_depth_first_tab(this, tab), i++)
+  {
+    end_tab= tab;
+    if (i == end_tab_idx)
+      break;
+  }
+
+  for (tab= first_depth_first_tab(this), i= const_tables;
+       ;
+       tab= next_depth_first_tab(this, tab), i++)
+  {
+    if (end_tab->bush_root_tab && end_tab->bush_root_tab == tab)
+    {
+      /* 
+        We've entered the SJM nest that contains the end_tab. The caller is
+        - interested in fanout inside the nest (because that's how many times 
+          we'll invoke the attached WHERE conditions)
+        - not interested in cost
+      */
+      record_count= 1.0;
+      read_time= 0.0;
+    }
+    
+    /* 
+      Ignore fanout (but not cost) from sj-inner tables, as long as 
+      the range that processes them finishes before the end_tab
+    */
+    if (tab->sj_strategy != SJ_OPT_NONE)
+    {
+      sj_inner_fanout= 1.0;
+      last_sj_table= i + tab->n_sj_tables;
+    }
+    
+    table_map cur_table_map;
+    if (tab->table)
+      cur_table_map= tab->table->map;
+    else
+    {
+      /* This is a SJ-Materialization nest. Check all of its tables */
+      TABLE *first_child= tab->bush_children->start->table;
+      TABLE_LIST *sjm_nest= first_child->pos_in_table_list->embedding;
+      cur_table_map= sjm_nest->nested_join->used_tables;
+    }
+    if (tab->records_read && (cur_table_map & filter_map))
+    {
+      record_count *= tab->records_read;
+      read_time += tab->read_time + record_count / (double) TIME_FOR_COMPARE;
+      if (tab->emb_sj_nest)
+        sj_inner_fanout *= tab->records_read;
+    }
+
+    if (i == last_sj_table)
+    {
+      record_count /= sj_inner_fanout;
+      sj_inner_fanout= 1.0;
+      last_sj_table= MAX_TABLES;
+    }
+
+    if (tab == end_tab)
+      break;
+  }
+  *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
+  *record_count_arg= record_count;
+}
+
+
+/*
+  Get prefix cost and fanout. This function is different from
+  get_partial_cost_and_fanout:
+   - it operates on a JOIN that haven't yet finished its optimization phase (in
+     particular, fix_semijoin_strategies_for_picked_join_order() and
+     get_best_combination() haven't been called)
+   - it assumes the the join prefix doesn't have any semi-join plans
+
+  These assumptions are met by the caller of the function.
+*/
+
+void JOIN::get_prefix_cost_and_fanout(uint n_tables, 
+                                      double *read_time_arg,
+                                      double *record_count_arg)
+{
+  double record_count= 1;
+  double read_time= 0.0;
+  for (uint i= const_tables; i < n_tables + const_tables ; i++)
+  {
+    if (best_positions[i].records_read)
+    {
+      record_count *= best_positions[i].records_read;
+      read_time += best_positions[i].read_time;
+    }
+  }
+  *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
+  *record_count_arg= record_count;
+}
+
+
+/**
+  Estimate the number of rows that query execution will read.
+
+  @todo This is a very pessimistic upper bound. Use join selectivity
+  when available to produce a more realistic number.
+*/
+
+double JOIN::get_examined_rows()
+{
+  ha_rows examined_rows;
+  double prev_fanout= 1;
+  JOIN_TAB *tab= first_breadth_first_tab(this, WALK_OPTIMIZATION_TABS);
+  JOIN_TAB *prev_tab= tab;
+
+  examined_rows= tab->get_examined_rows();
+
+  while ((tab= next_breadth_first_tab(this, WALK_OPTIMIZATION_TABS, tab)))
+  {
+    prev_fanout *= prev_tab->records_read;
+    examined_rows+= (ha_rows) (tab->get_examined_rows() * prev_fanout);
+    prev_tab= tab;
+  }
+  return examined_rows;
+}
+
+
+/**
   Find a good, possibly optimal, query execution plan (QEP) by a possibly
   exhaustive search.
 
@@ -5500,38 +6997,54 @@ best_extension_by_limited_search(JOIN      *join,
   JOIN_TAB *s;
   double best_record_count= DBL_MAX;
   double best_read_time=    DBL_MAX;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
 
   DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
                                 "part_plan"););
 
+  /* 
+    If we are searching for the execution plan of a materialized semi-join nest
+    then allowed_tables contains bits only for the tables from this nest.
+  */
+  table_map allowed_tables= ~(table_map)0;
+  if (join->emb_sjm_nest)
+    allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map;
+
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     table_map real_table_bit= s->table->map;
     if ((remaining_tables & real_table_bit) && 
+        (allowed_tables & real_table_bit) &&
         !(remaining_tables & s->dependent) && 
         (!idx || !check_interleaving_with_nj(s)))
     {
       double current_record_count, current_read_time;
+      POSITION *position= join->positions + idx;
 
       /* Find the best access method from 's' to the current partial plan */
-      best_access_path(join, s, thd, remaining_tables, idx,
-                       record_count, read_time);
+      POSITION loose_scan_pos;
+      best_access_path(join, s, remaining_tables, idx, disable_jbuf,
+                       record_count, join->positions + idx, &loose_scan_pos);
+
       /* Compute the cost of extending the plan with 's' */
-      current_record_count= record_count * join->positions[idx].records_read;
-      current_read_time=    read_time + join->positions[idx].read_time;
+
+      current_record_count= record_count * position->records_read;
+      current_read_time=read_time + position->read_time +
+                        current_record_count / (double) TIME_FOR_COMPARE;
+
+      advance_sj_state(join, remaining_tables, idx, &current_record_count,
+                       &current_read_time, &loose_scan_pos);
 
       /* Expand only partial plans with lower cost than the best QEP so far */
-      if ((current_read_time +
-           current_record_count / (double) TIME_FOR_COMPARE) >= join->best_read)
+      if (current_read_time >= join->best_read)
       {
         DBUG_EXECUTE("opt", print_plan(join, idx+1,
                                        current_record_count,
                                        read_time,
-                                       (current_read_time +
-                                        current_record_count / 
-                                        (double) TIME_FOR_COMPARE),
+                                       current_read_time,
                                        "prune_by_cost"););
         restore_prev_nj_state(s);
+        restore_prev_sj_state(remaining_tables, s, idx);
         continue;
       }
 
@@ -5549,7 +7062,7 @@ best_extension_by_limited_search(JOIN      *join,
           if (best_record_count >= current_record_count &&
               best_read_time >= current_read_time &&
               /* TODO: What is the reasoning behind this condition? */
-              (!(s->key_dependent & remaining_tables) ||
+              (!(s->key_dependent & allowed_tables & remaining_tables) ||
                join->positions[idx].records_read < 2.0))
           {
             best_record_count= current_record_count;
@@ -5564,11 +7077,12 @@ best_extension_by_limited_search(JOIN      *join,
                                          current_read_time,
                                          "pruned_by_heuristic"););
           restore_prev_nj_state(s);
+          restore_prev_sj_state(remaining_tables, s, idx);
           continue;
         }
       }
 
-      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) )
+      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
       { /* Recursively expand the current partial plan */
         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
         if (best_extension_by_limited_search(join,
@@ -5586,16 +7100,16 @@ best_extension_by_limited_search(JOIN      *join,
           'join' is either the best partial QEP with 'search_depth' relations,
           or the best complete QEP so far, whichever is smaller.
         */
-        current_read_time+= current_record_count / (double) TIME_FOR_COMPARE;
         if (join->sort_by_table &&
             join->sort_by_table !=
             join->positions[join->const_tables].table->table)
           /* We have to make a temp table */
           current_read_time+= current_record_count;
-        if ((search_depth == 1) || (current_read_time < join->best_read))
+        if (current_read_time < join->best_read)
         {
           memcpy((uchar*) join->best_positions, (uchar*) join->positions,
                  sizeof(POSITION) * (idx + 1));
+          join->record_count= current_record_count;
           join->best_read= current_read_time - 0.001;
         }
         DBUG_EXECUTE("opt", print_plan(join, idx+1,
@@ -5605,6 +7119,7 @@ best_extension_by_limited_search(JOIN      *join,
                                        "full_plan"););
       }
       restore_prev_nj_state(s);
+      restore_prev_sj_state(remaining_tables, s, idx);
     }
   }
   DBUG_RETURN(FALSE);
@@ -5651,6 +7166,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 
   JOIN_TAB *s;
   double best_record_count=DBL_MAX,best_read_time=DBL_MAX;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
   for (JOIN_TAB **pos=join->best_ref+idx ; (s=*pos) ; pos++)
   {
     table_map real_table_bit=s->table->map;
@@ -5658,8 +7174,9 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
         (!idx|| !check_interleaving_with_nj(s)))
     {
       double records, best;
-      best_access_path(join, s, thd, rest_tables, idx, record_count, 
-                       read_time);
+      POSITION loose_scan_pos;
+      best_access_path(join, s, rest_tables, idx, disable_jbuf, record_count, 
+                       join->positions + idx, &loose_scan_pos);
       records= join->positions[idx].records_read;
       best= join->positions[idx].read_time;
       /*
@@ -5668,6 +7185,9 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
       */
       double current_record_count=record_count*records;
       double current_read_time=read_time+best;
+      advance_sj_state(join, rest_tables, idx, &current_record_count, 
+                       &current_read_time, &loose_scan_pos);
+
       if (best_record_count > current_record_count ||
 	  best_read_time > current_read_time ||
 	  (idx == join->const_tables && s->table == join->sort_by_table))
@@ -5686,6 +7206,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 	swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
       }
       restore_prev_nj_state(s);
+      restore_prev_sj_state(rest_tables, s, idx);
       if (join->select_options & SELECT_STRAIGHT_JOIN)
 	break;				// Don't test all combinations
     }
@@ -5698,14 +7219,16 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
   Find how much space the prevous read not const tables takes in cache.
 */
 
-static void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
+void JOIN_TAB::calc_used_field_length(bool max_fl)
 {
-  uint null_fields,blobs,fields,rec_length;
+  uint null_fields,blobs,fields;
+  ulong rec_length;
   Field **f_ptr,*field;
-  MY_BITMAP *read_set= join_tab->table->read_set;;
+  uint uneven_bit_fields;
+  MY_BITMAP *read_set= table->read_set;
 
-  null_fields= blobs= fields= rec_length=0;
-  for (f_ptr=join_tab->table->field ; (field= *f_ptr) ; f_ptr++)
+  uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
+  for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
   {
     if (bitmap_is_set(read_set, field->field_index))
     {
@@ -5716,21 +7239,118 @@ static void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
 	blobs++;
       if (!(flags & NOT_NULL_FLAG))
 	null_fields++;
+      if (field->type() == MYSQL_TYPE_BIT &&
+          ((Field_bit*)field)->bit_len)
+        uneven_bit_fields++;
     }
   }
-  if (null_fields)
-    rec_length+=(join_tab->table->s->null_fields+7)/8;
-  if (join_tab->table->maybe_null)
+  if (null_fields || uneven_bit_fields)
+    rec_length+=(table->s->null_fields+7)/8;
+  if (table->maybe_null)
     rec_length+=sizeof(my_bool);
-  if (blobs)
+
+  /* Take into account that DuplicateElimination may need to store rowid */
+  uint rowid_add_size= 0;
+  if (keep_current_rowid)
+  {
+    rowid_add_size= table->file->ref_length; 
+    rec_length += rowid_add_size;
+    fields++;
+  }
+
+  if (max_fl)
+  {
+    // TODO: to improve this estimate for max expected length 
+    if (blobs)
+    {
+      ulong blob_length= table->file->stats.mean_rec_length;
+      if (ULONG_MAX - rec_length > blob_length)
+        rec_length+=  blob_length;
+      else
+        rec_length= ULONG_MAX;
+    }
+    max_used_fieldlength= rec_length;
+  } 
+  else if (table->file->stats.mean_rec_length)
+    set_if_smaller(rec_length, table->file->stats.mean_rec_length + rowid_add_size);
+      
+  used_fields=fields;
+  used_fieldlength=rec_length;
+  used_blobs=blobs;
+  used_null_fields= null_fields;
+  used_uneven_bit_fields= uneven_bit_fields;
+}
+
+
+/* 
+  @brief
+  Extract pushdown conditions for a table scan
+
+  @details
+  This functions extracts pushdown conditions usable when this table is scanned.
+  The conditions are extracted either from WHERE or from ON expressions.
+  The conditions are attached to the field cache_select of this table.
+
+  @note 
+  Currently the extracted conditions are used only by BNL and BNLH join.
+  algorithms.
+ 
+  @retval  0   on success
+           1   otherwise
+*/ 
+
+int JOIN_TAB::make_scan_filter()
+{
+  COND *tmp;
+  DBUG_ENTER("make_scan_filter");
+
+  Item *cond= is_inner_table_of_outer_join() ?
+                *get_first_inner_table()->on_expr_ref : join->conds;
+  
+  if (cond &&
+      (tmp= make_cond_for_table(join->thd, cond,
+                               join->const_table_map | table->map,
+			       table->map, -1, FALSE, TRUE)))
+  {
+     DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
+     if (!(cache_select=
+          (SQL_SELECT*) join->thd->memdup((uchar*) select, sizeof(SQL_SELECT))))
+	DBUG_RETURN(1);
+     cache_select->cond= tmp;
+     cache_select->read_tables=join->const_table_map;
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  @brief
+  Check whether hash join algorithm can be used to join this table   
+
+  @details
+  This function finds out whether the ref items that have been chosen
+  by the planner to access this table can be used for hash join algorithms.
+  The answer depends on a certain property of the the fields of the
+  joined tables on which the hash join key is built.
+  
+  @note
+  At present the function is supposed to be called only after the function
+  get_best_combination has been called.
+
+  @retval TRUE    it's possible to use hash join to join this table
+  @retval FALSE   otherwise
+*/
+
+bool JOIN_TAB::hash_join_is_possible()
+{
+  if (type != JT_REF && type != JT_EQ_REF)
+    return FALSE;
+  if (!is_ref_for_hash_join())
   {
-    uint blob_length=(uint) (join_tab->table->file->stats.mean_rec_length-
-			     (join_tab->table->s->reclength- rec_length));
-    rec_length+=(uint) max(4,blob_length);
+    KEY *keyinfo= table->key_info + ref.key;
+    return keyinfo->key_part[0].field->hash_join_is_possible();
   }
-  join_tab->used_fields=fields;
-  join_tab->used_fieldlength=rec_length;
-  join_tab->used_blobs=blobs;
+  return TRUE;
 }
 
 
@@ -5739,16 +7359,13 @@ cache_record_length(JOIN *join,uint idx)
 {
   uint length=0;
   JOIN_TAB **pos,**end;
-  THD *thd=join->thd;
 
   for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ;
        pos != end ;
        pos++)
   {
     JOIN_TAB *join_tab= *pos;
-    if (!join_tab->used_fieldlength)		/* Not calced yet */
-      calc_used_field_length(thd, join_tab);
-    length+=join_tab->used_fieldlength;
+    length+= join_tab->get_used_fieldlength();
   }
   return length;
 }
@@ -5805,12 +7422,12 @@ cache_record_length(JOIN *join,uint idx)
     Expected number of row combinations
 */
 
-static double
-prev_record_reads(JOIN *join, uint idx, table_map found_ref)
+double
+prev_record_reads(POSITION *positions, uint idx, table_map found_ref)
 {
   double found=1.0;
-  POSITION *pos_end= join->positions - 1;
-  for (POSITION *pos= join->positions + idx - 1; pos != pos_end; pos--)
+  POSITION *pos_end= positions - 1;
+  for (POSITION *pos= positions + idx - 1; pos != pos_end; pos--)
   {
     if (pos->table->table->map & found_ref)
     {
@@ -5839,14 +7456,256 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref)
 }
 
 
-/**
-  Set up join struct according to best position.
+/*
+  Enumerate join tabs in breadth-first fashion, including const tables.
 */
 
-static bool
+JOIN_TAB *first_breadth_first_tab(JOIN *join, enum enum_exec_or_opt tabs_kind)
+{
+  /* There's always one (i.e. first) table */
+  return (tabs_kind == WALK_EXECUTION_TABS)? join->join_tab:
+                                             join->table_access_tabs;
+}
+
+
+JOIN_TAB *next_breadth_first_tab(JOIN *join, enum enum_exec_or_opt tabs_kind,
+                                 JOIN_TAB *tab)
+{
+  JOIN_TAB* const first_top_tab= first_breadth_first_tab(join, tabs_kind);
+  const uint n_top_tabs_count= (tabs_kind == WALK_EXECUTION_TABS)? 
+                                  join->top_join_tab_count:
+                                  join->top_table_access_tabs_count;
+  if (!tab->bush_root_tab)
+  {
+    /* We're at top level. Get the next top-level tab */
+    tab++;
+    if (tab < first_top_tab + n_top_tabs_count)
+      return tab;
+
+    /* No more top-level tabs. Switch to enumerating SJM nest children */
+    tab= first_top_tab;
+  }
+  else
+  {
+    /* We're inside of an SJM nest */
+    if (!tab->last_leaf_in_bush)
+    {
+      /* There's one more table in the nest, return it. */
+      return ++tab;
+    }
+    else
+    {
+      /* 
+        There are no more tables in this nest. Get out of it and then we'll
+        proceed to the next nest.
+      */
+      tab= tab->bush_root_tab + 1;
+    }
+  }
+   
+  /* 
+    Ok, "tab" points to a top-level table, and we need to find the next SJM
+    nest and enter it.
+  */
+  for (; tab < first_top_tab + n_top_tabs_count; tab++)
+  {
+    if (tab->bush_children)
+      return tab->bush_children->start;
+  }
+  return NULL;
+}
+
+
+JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls)
+{
+  JOIN_TAB *tab= join->join_tab;
+  if (const_tbls == WITHOUT_CONST_TABLES)
+  {
+    if (join->const_tables == join->table_count)
+      return NULL;
+    tab += join->const_tables;
+  }
+  return tab;
+}
+
+
+JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
+{
+  tab= next_breadth_first_tab(join, WALK_EXECUTION_TABS, tab);
+  if (tab && tab->bush_root_tab)
+    tab= NULL;
+  return tab;
+}
+
+
+JOIN_TAB *first_linear_tab(JOIN *join,
+                           enum enum_with_bush_roots include_bush_roots,
+                           enum enum_with_const_tables const_tbls)
+{
+  JOIN_TAB *first= join->join_tab;
+  if (const_tbls == WITHOUT_CONST_TABLES)
+    first+= join->const_tables;
+
+  if (first >= join->join_tab + join->top_join_tab_count)
+    return NULL; /* All are const tables */
+
+  if (first->bush_children && include_bush_roots == WITHOUT_BUSH_ROOTS)
+  {
+    /* This JOIN_TAB is a SJM nest; Start from first table in nest */
+    return first->bush_children->start;
+  }
+
+  return first;
+}
+
+
+/*
+  A helper function to loop over all join's join_tab in sequential fashion
+
+  DESCRIPTION
+    Depending on include_bush_roots parameter, JOIN_TABs that represent
+    SJM-scan/lookups are either returned or omitted.
+
+    SJM-Bush children are returned right after (or in place of) their container
+    join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems
+    to)
+
+    For example, if we have this structure:
+      
+       ot1--ot2--sjm1----------------ot3-...
+                  |
+                  +--it1--it2--it3
+
+    calls to next_linear_tab( include_bush_roots=TRUE) will return:
+      
+      ot1 ot2 sjm1 it1 it2 it3 ot3 ...
+   
+   while calls to next_linear_tab( include_bush_roots=FALSE) will return:
+
+      ot1 ot2 it1 it2 it3 ot3 ...
+
+   (note that sjm1 won't be returned).
+*/
+
+JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, 
+                          enum enum_with_bush_roots include_bush_roots)
+{
+  if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children)
+  {
+    /* This JOIN_TAB is a SJM nest; Start from first table in nest */
+    return tab->bush_children->start;
+  }
+
+  DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab);
+
+  if (tab->bush_root_tab)       /* Are we inside an SJM nest */
+  {
+    /* Inside SJM nest */
+    if (!tab->last_leaf_in_bush)
+      return tab+1;              /* Return next in nest */
+    /* Continue from the sjm on the top level */
+    tab= tab->bush_root_tab;
+  }
+
+  /* If no more JOIN_TAB's on the top level */
+  if (++tab == join->join_tab + join->top_join_tab_count)
+    return NULL;
+
+  if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children)
+  {
+    /* This JOIN_TAB is a SJM nest; Start from first table in nest */
+    tab= tab->bush_children->start;
+  }
+  return tab;
+}
+
+
+/*
+  Start to iterate over all join tables in bush-children-first order, excluding 
+  the const tables (see next_depth_first_tab() comment for details)
+*/
+
+JOIN_TAB *first_depth_first_tab(JOIN* join)
+{
+  JOIN_TAB* tab;
+  /* This means we're starting the enumeration */
+  if (join->const_tables == join->top_join_tab_count)
+    return NULL;
+
+  tab= join->join_tab + join->const_tables;
+
+  return (tab->bush_children) ? tab->bush_children->start : tab;
+}
+
+
+/*
+  A helper function to iterate over all join tables in bush-children-first order
+
+  DESCRIPTION
+   
+  For example, for this join plan
+
+    ot1--ot2--sjm1------------ot3-...
+               |
+               |
+              it1--it2--it3 
+  
+  call to first_depth_first_tab() will return ot1, and subsequent calls to
+  next_depth_first_tab() will return:
+
+     ot2 it1 it2 it3 sjm ot3 ...
+*/
+
+JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab)
+{
+  /* If we're inside SJM nest and have reached its end, get out */
+  if (tab->last_leaf_in_bush)
+    return tab->bush_root_tab;
+  
+  /* Move to next tab in the array we're traversing */
+  tab++;
+  
+  if (tab == join->join_tab +join->top_join_tab_count)
+    return NULL; /* Outside SJM nest and reached EOF */
+
+  if (tab->bush_children)
+    return tab->bush_children->start;
+
+  return tab;
+}
+
+
+static Item * const null_ptr= NULL;
+
+/*
+  Set up join struct according to the picked join order in
+  
+  SYNOPSIS
+    get_best_combination()
+      join  The join to process (the picked join order is mainly in
+            join->best_positions)
+
+  DESCRIPTION
+    Setup join structures according the picked join order
+    - finalize semi-join strategy choices (see
+        fix_semijoin_strategies_for_picked_join_order)
+    - create join->join_tab array and put there the JOIN_TABs in the join order
+    - create data structures describing ref access methods.
+
+  NOTE
+    In this function we switch from pre-join-optimization JOIN_TABs to
+    post-join-optimization JOIN_TABs. This is achieved by copying the entire
+    JOIN_TAB objects.
+ 
+  RETURN 
+    FALSE  OK
+    TRUE   Out of memory
+*/
+
+bool
 get_best_combination(JOIN *join)
 {
-  uint i,tablenr;
+  uint tablenr;
   table_map used_tables;
   JOIN_TAB *join_tab,*j;
   KEYUSE *keyuse;
@@ -5854,64 +7713,318 @@ get_best_combination(JOIN *join)
   THD *thd=join->thd;
   DBUG_ENTER("get_best_combination");
 
-  table_count=join->tables;
+  table_count=join->table_count;
   if (!(join->join_tab=join_tab=
 	(JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count)))
     DBUG_RETURN(TRUE);
 
   join->full_join=0;
+  join->hash_join= FALSE;
 
   used_tables= OUTER_REF_TABLE_BIT;		// Outer row is already read
+
+  fix_semijoin_strategies_for_picked_join_order(join);
+  
+  JOIN_TAB_RANGE *root_range;
+  if (!(root_range= new JOIN_TAB_RANGE))
+    DBUG_RETURN(TRUE);
+  root_range->start= join->join_tab;
+  /* root_range->end will be set later */
+  join->join_tab_ranges.empty();
+
+  if (join->join_tab_ranges.push_back(root_range))
+    DBUG_RETURN(TRUE);
+
+  JOIN_TAB *sjm_nest_end= NULL;
+  JOIN_TAB *sjm_nest_root= NULL;
+
   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
   {
     TABLE *form;
+    POSITION *cur_pos= &join->best_positions[tablenr];
+    if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || 
+        cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+    {
+      /*
+        Ok, we've entered an SJ-Materialization semi-join (note that this can't
+        be done recursively, semi-joins are not allowed to be nested).
+        1. Put into main join order a JOIN_TAB that represents a lookup or scan
+           in the temptable.
+      */
+      bzero(j, sizeof(JOIN_TAB));
+      j->join= join;
+      j->table= NULL; //temporary way to tell SJM tables from others.
+      j->ref.key = -1;
+      j->on_expr_ref= (Item**) &null_ptr;
+      j->keys= key_map(1); /* The unique index is always in 'possible keys' in EXPLAIN */
+
+      /*
+        2. Proceed with processing SJM nest's join tabs, putting them into the
+           sub-order
+      */
+      SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
+      j->records= j->records_read= (ha_rows)(sjm->is_sj_scan? sjm->rows : 1);
+      JOIN_TAB *jt;
+      JOIN_TAB_RANGE *jt_range;
+      if (!(jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) ||
+          !(jt_range= new JOIN_TAB_RANGE))
+        DBUG_RETURN(TRUE);
+      jt_range->start= jt;
+      jt_range->end= jt + sjm->tables;
+      join->join_tab_ranges.push_back(jt_range);
+      j->bush_children= jt_range;
+      sjm_nest_end= jt + sjm->tables;
+      sjm_nest_root= j;
+
+      j= jt;
+    }
+    
     *j= *join->best_positions[tablenr].table;
-    form=join->all_tables[tablenr]=j->table;
+
+    j->bush_root_tab= sjm_nest_root;
+
+    form=join->table[tablenr]=j->table;
     used_tables|= form->map;
     form->reginfo.join_tab=j;
     if (!*j->on_expr_ref)
       form->reginfo.not_exists_optimize=0;	// Only with LEFT JOIN
     DBUG_PRINT("info",("type: %d", j->type));
     if (j->type == JT_CONST)
-      continue;					// Handled in make_join_stat..
+      goto loop_end;					// Handled in make_join_stat..
 
+    j->loosescan_match_tab= NULL;  //non-nulls will be set later
+    j->inside_loosescan_range= FALSE;
     j->ref.key = -1;
     j->ref.key_parts=0;
 
     if (j->type == JT_SYSTEM)
-      continue;
-    if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key))
+      goto loop_end;
+    if ( !(keyuse= join->best_positions[tablenr].key))
     {
       j->type=JT_ALL;
-      if (tablenr != join->const_tables)
-	join->full_join=1;
+      if (join->best_positions[tablenr].use_join_buffer &&
+          tablenr != join->const_tables)
+	join->full_join= 1;
     }
-    else if (create_ref_for_key(join, j, keyuse, used_tables))
+
+    /*if (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)
+    {
+      DBUG_ASSERT(!keyuse || keyuse->key ==
+                             join->best_positions[tablenr].loosescan_picker.loosescan_key);
+      j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key;
+    }*/
+    
+    if (keyuse && create_ref_for_key(join, j, keyuse, TRUE, used_tables))
       DBUG_RETURN(TRUE);                        // Something went wrong
+
+    if ((j->type == JT_REF || j->type == JT_EQ_REF) &&
+        is_hash_join_key_no(j->ref.key))
+      join->hash_join= TRUE; 
+
+  loop_end:
+    /* 
+      Save records_read in JOIN_TAB so that select_describe()/etc don't have
+      to access join->best_positions[]. 
+    */
+    j->records_read= (ha_rows)join->best_positions[tablenr].records_read;
+    join->map2table[j->table->tablenr]= j;
+
+    /* If we've reached the end of sjm nest, switch back to main sequence */
+    if (j + 1 == sjm_nest_end)
+    {
+      j->last_leaf_in_bush= TRUE;
+      j= sjm_nest_root;
+      sjm_nest_root= NULL;
+      sjm_nest_end= NULL;
+    }
   }
+  root_range->end= j;
+
+  join->top_join_tab_count= join->join_tab_ranges.head()->end - 
+                            join->join_tab_ranges.head()->start;
+  /*
+    Save pointers to select join tabs for SHOW EXPLAIN
+  */
+  join->table_access_tabs= join->join_tab;
+  join->top_table_access_tabs_count= join->top_join_tab_count;
 
-  for (i=0 ; i < table_count ; i++)
-    join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i;
   update_depend_map(join);
   DBUG_RETURN(0);
 }
 
+/**
+  Create a descriptor of hash join key to access a given join table  
+
+  @param   join         join which the join table belongs to
+  @param   join_tab     the join table to access
+  @param   org_keyuse   beginning of the key uses to join this table
+  @param   used_tables  bitmap of the previous tables
 
-static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
-			       table_map used_tables)
+  @details
+  This function first finds key uses that can be utilized by the hash join
+  algorithm to join join_tab to the previous tables marked in the bitmap 
+  used_tables.  The tested key uses are taken from the array of all key uses
+  for 'join' starting from the position org_keyuse. After all interesting key
+  uses have been found the function builds a descriptor of the corresponding
+  key that is used by the hash join algorithm would it be chosen to join
+  the table join_tab.
+
+  @retval  FALSE  the descriptor for a hash join key is successfully created
+  @retval  TRUE   otherwise
+*/
+
+static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab,
+                                    KEYUSE *org_keyuse, table_map used_tables)
 {
-  KEYUSE *keyuse=org_keyuse;
-  bool ftkey=(keyuse->keypart == FT_KEYPART);
+  KEY *keyinfo;
+  KEY_PART_INFO *key_part_info;
+  KEYUSE *keyuse= org_keyuse;
+  uint key_parts= 0;
   THD  *thd= join->thd;
-  uint keyparts,length,key;
+  TABLE *table= join_tab->table;
+  bool first_keyuse= TRUE;
+  DBUG_ENTER("create_hj_key_for_table");
+
+  do
+  {
+    if (!(~used_tables & keyuse->used_tables) &&
+        are_tables_local(join_tab, keyuse->used_tables))    
+    {
+      if (first_keyuse)
+      {
+        key_parts++;
+        first_keyuse= FALSE;
+      }
+      else
+      {
+        KEYUSE *curr= org_keyuse;
+        for( ; curr < keyuse; curr++)
+        {
+          if (curr->keypart == keyuse->keypart &&
+              !(~used_tables & curr->used_tables) &&
+              are_tables_local(join_tab, curr->used_tables))
+            break;
+        }
+        if (curr == keyuse)
+           key_parts++;
+      }
+    }
+    keyuse++;
+  } while (keyuse->table == table && keyuse->is_for_hash_join());
+  if (!key_parts)
+    DBUG_RETURN(TRUE);
+  /* This memory is allocated only once for the joined table join_tab */
+  if (!(keyinfo= (KEY *) thd->alloc(sizeof(KEY))) ||
+      !(key_part_info = (KEY_PART_INFO *) thd->alloc(sizeof(KEY_PART_INFO)*
+                                                     key_parts)))
+    DBUG_RETURN(TRUE);
+  keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
+  keyinfo->ext_key_parts= keyinfo->key_parts;
+  keyinfo->key_part= key_part_info;
+  keyinfo->key_length=0;
+  keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+  keyinfo->flags= HA_GENERATED_KEY;
+  keyinfo->name= (char *) "$hj";
+  keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts);
+  if (!keyinfo->rec_per_key)
+    DBUG_RETURN(TRUE);
+  keyinfo->key_part= key_part_info;
+
+  first_keyuse= TRUE;
+  keyuse= org_keyuse;
+  do
+  {
+    if (!(~used_tables & keyuse->used_tables) &&
+        are_tables_local(join_tab, keyuse->used_tables))
+    { 
+      bool add_key_part= TRUE;
+      if (!first_keyuse)
+      {
+        for(KEYUSE *curr= org_keyuse; curr < keyuse; curr++)
+        {
+          if (curr->keypart == keyuse->keypart &&
+              !(~used_tables & curr->used_tables) &&
+               are_tables_local(join_tab, curr->used_tables))
+	  {
+            keyuse->keypart= NO_KEYPART;
+            add_key_part= FALSE;
+            break;
+          }
+        }
+      }
+      if (add_key_part)
+      {
+        Field *field= table->field[keyuse->keypart];
+        uint fieldnr= keyuse->keypart+1;
+        table->create_key_part_by_field(key_part_info, field, fieldnr);
+        keyinfo->key_length += key_part_info->store_length;
+        key_part_info++;
+      }
+    }
+    first_keyuse= FALSE;
+    keyuse++;
+  } while (keyuse->table == table && keyuse->is_for_hash_join());
+
+  keyinfo->ext_key_parts= keyinfo->key_parts;
+  keyinfo->ext_key_flags= keyinfo->flags;
+  keyinfo->ext_key_part_map= 0;
+
+  join_tab->hj_key= keyinfo;
+
+  DBUG_RETURN(FALSE);
+}
+
+/* 
+  Check if a set of tables specified by used_tables can be accessed when
+  we're doing scan on join_tab jtab.
+*/
+static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables)
+{
+  if (jtab->bush_root_tab)
+  {
+    /*
+      jtab is inside execution join nest. We may not refer to outside tables,
+      except the const tables.
+    */
+    table_map local_tables= jtab->emb_sj_nest->nested_join->used_tables |
+                            jtab->join->const_table_map |
+                            OUTER_REF_TABLE_BIT;
+    return !test(used_tables & ~local_tables);
+  }
+
+  /* 
+    If we got here then jtab is at top level. 
+     - all other tables at top level are accessible,
+     - tables in join nests are accessible too, because all their columns that 
+       are needed at top level will be unpacked when scanning the
+       materialization table.
+  */
+  return TRUE;
+}
+
+static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
+                               KEYUSE *org_keyuse, bool allow_full_scan, 
+                               table_map used_tables)
+{
+  uint keyparts, length, key;
   TABLE *table;
   KEY *keyinfo;
+  KEYUSE *keyuse= org_keyuse;
+  bool ftkey= (keyuse->keypart == FT_KEYPART);
+  THD *thd= join->thd;
   DBUG_ENTER("create_ref_for_key");
 
   /*  Use best key from find_best */
-  table=j->table;
-  key=keyuse->key;
-  keyinfo=table->key_info+key;
+  table= j->table;
+  key= keyuse->key;
+  if (!is_hash_join_key_no(key))
+    keyinfo= table->key_info+key;
+  else
+  {
+    if (create_hj_key_for_table(join, j, org_keyuse, used_tables))
+      DBUG_RETURN(TRUE);
+    keyinfo= j->hj_key;
+  }
 
   if (ftkey)
   {
@@ -5932,30 +8045,45 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
     */
     do
     {
-      if (!(~used_tables & keyuse->used_tables))
+      if (!(~used_tables & keyuse->used_tables) &&
+          j->access_from_tables_is_allowed(keyuse->used_tables,
+                                           join->sjm_lookup_tables))
       {
-	if (keyparts == keyuse->keypart &&
-	    !(found_part_ref_or_null & keyuse->optimize))
-	{
-	  keyparts++;
-	  length+= keyinfo->key_part[keyuse->keypart].store_length;
-	  found_part_ref_or_null|= keyuse->optimize;
-	}
+        if  (are_tables_local(j, keyuse->val->used_tables()))
+        {
+          if ((is_hash_join_key_no(key) && keyuse->keypart != NO_KEYPART) ||
+              (!is_hash_join_key_no(key) && keyparts == keyuse->keypart &&
+               !(found_part_ref_or_null & keyuse->optimize)))
+          {
+             length+= keyinfo->key_part[keyparts].store_length;
+             keyparts++;
+             found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
+          }
+        }
       }
       keyuse++;
     } while (keyuse->table == table && keyuse->key == key);
-    DBUG_ASSERT(length > 0 && keyparts != 0);
-  } /* not ftkey */
 
+    if (!keyparts && allow_full_scan)
+    {
+      /* It's a LooseIndexScan strategy scanning whole index */
+      j->type= JT_ALL;
+      j->index= key;
+      DBUG_RETURN(FALSE);
+    }
+
+    DBUG_ASSERT(length > 0);
+    DBUG_ASSERT(keyparts != 0);
+  } /* not ftkey */
+  
   /* set up fieldref */
-  keyinfo=table->key_info+key;
-  j->ref.key_parts=keyparts;
-  j->ref.key_length=length;
-  j->ref.key=(int) key;
+  j->ref.key_parts= keyparts;
+  j->ref.key_length= length;
+  j->ref.key= (int) key;
   if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) ||
       !(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
-						   (keyparts+1)))) ||
-      !(j->ref.items=    (Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
+						          (keyparts+1)))) ||
+      !(j->ref.items=(Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
       !(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
   {
     DBUG_RETURN(TRUE);
@@ -5964,11 +8092,13 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
   j->ref.key_err=1;
   j->ref.has_record= FALSE;
   j->ref.null_rejecting= 0;
-  j->ref.use_count= 0;
+  j->ref.disable_cache= FALSE;
+  j->ref.null_ref_part= NO_REF_PART;
   keyuse=org_keyuse;
 
   store_key **ref_key= j->ref.key_copy;
   uchar *key_buff=j->ref.key_buff, *null_ref_key= 0;
+  uint null_ref_part= NO_REF_PART;
   bool keyuse_uses_no_tables= TRUE;
   if (ftkey)
   {
@@ -5985,9 +8115,15 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
     uint i;
     for (i=0 ; i < keyparts ; keyuse++,i++)
     {
-      while (keyuse->keypart != i ||
-	     ((~used_tables) & keyuse->used_tables))
-	keyuse++;				/* Skip other parts */
+      while (((~used_tables) & keyuse->used_tables) ||
+	     !j->access_from_tables_is_allowed(keyuse->used_tables,
+                                               join->sjm_lookup_tables) ||    
+             keyuse->keypart == NO_KEYPART ||
+	     (keyuse->keypart != 
+              (is_hash_join_key_no(key) ?
+                 keyinfo->key_part[i].field->field_index : i)) || 
+             !are_tables_local(j, keyuse->val->used_tables())) 
+	 keyuse++;                              	/* Skip other parts */ 
 
       uint maybe_null= test(keyinfo->key_part[i].null_bit);
       j->ref.items[i]=keyuse->val;		// Save for cond removal
@@ -5995,13 +8131,15 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
       if (keyuse->null_rejecting) 
         j->ref.null_rejecting|= (key_part_map)1 << i;
       keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
-      if (!keyuse->used_tables &&
-	  !(join->select_options & SELECT_DESCRIBE))
+      if (!keyuse->val->used_tables() && !thd->lex->describe)
       {					// Compare against constant
-	store_key_item tmp(thd, keyinfo->key_part[i].field,
+	store_key_item tmp(thd, 
+                           keyinfo->key_part[i].field,
                            key_buff + maybe_null,
                            maybe_null ?  key_buff : 0,
-                           keyinfo->key_part[i].length, keyuse->val);
+                           keyinfo->key_part[i].length,
+                           keyuse->val,
+                           FALSE);
 	if (thd->is_fatal_error)
 	  DBUG_RETURN(TRUE);
 	tmp.copy();
@@ -6017,21 +8155,30 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
 	instead of JT_REF_OR_NULL in case if field can't be null
       */
       if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
+      {
 	null_ref_key= key_buff;
-      key_buff+=keyinfo->key_part[i].store_length;
+        null_ref_part= i;
+      }
+      key_buff+= keyinfo->key_part[i].store_length;
     }
   } /* not ftkey */
   *ref_key=0;				// end_marker
   if (j->type == JT_FT)
     DBUG_RETURN(0);
+  ulong key_flags= j->table->actual_key_flags(keyinfo);
   if (j->type == JT_CONST)
     j->table->const_table= 1;
-  else if (((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) != HA_NOSAME) ||
-	   keyparts != keyinfo->key_parts || null_ref_key)
+  else if (!((keyparts == keyinfo->key_parts && 
+              ((key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)) ||
+	     (keyparts > keyinfo->key_parts &&   // true only for extended keys 
+              test(key_flags & HA_EXT_NOSAME) &&
+              keyparts == keyinfo->ext_key_parts)) ||
+	    null_ref_key)
   {
     /* Must read with repeat */
     j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
     j->ref.null_ref_key= null_ref_key;
+    j->ref.null_ref_part= null_ref_part;
   }
   else if (keyuse_uses_no_tables)
   {
@@ -6046,6 +8193,9 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
   }
   else
     j->type=JT_EQ_REF;
+
+  j->read_record.unlock_row= (j->type == JT_EQ_REF)? 
+                             join_read_key_unlock_row : rr_unlock_row; 
   DBUG_RETURN(0);
 }
 
@@ -6064,73 +8214,30 @@ get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
 				    key_part->length,
 				    keyuse->val);
   }
-
-  Item_field *field_item= NULL;
-  if (keyuse->val->type() == Item::FIELD_ITEM)  
-    field_item= static_cast<Item_field*>(keyuse->val->real_item());
-  else if (keyuse->val->type() == Item::REF_ITEM)
-  {
-    Item_ref *item_ref= static_cast<Item_ref*>(keyuse->val);
-    if (item_ref->ref_type() == Item_ref::OUTER_REF)
-    {
-      if ((*item_ref->ref)->type() == Item::FIELD_ITEM)
-        field_item= static_cast<Item_field*>(item_ref->real_item());
-      else if ((*(Item_ref**)(item_ref)->ref)->ref_type()
-               == Item_ref::DIRECT_REF
-               && 
-               item_ref->real_item()->type() == Item::FIELD_ITEM)
-        field_item= static_cast<Item_field*>(item_ref->real_item());
-    }
-  }
-  if (field_item)
+  else if (keyuse->val->type() == Item::FIELD_ITEM ||
+           (keyuse->val->type() == Item::REF_ITEM &&
+	    ((((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF &&
+              (*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() ==
+              Item_ref::DIRECT_REF) || 
+             ((Item_ref*)keyuse->val)->ref_type() == Item_ref::VIEW_REF) &&
+            keyuse->val->real_item()->type() == Item::FIELD_ITEM))
     return new store_key_field(thd,
-                               key_part->field,
-                               key_buff + maybe_null,
-                               maybe_null ? key_buff : 0,
-                               key_part->length,
-                               field_item->field,
-                               keyuse->val->full_name());
+			       key_part->field,
+			       key_buff + maybe_null,
+			       maybe_null ? key_buff : 0,
+			       key_part->length,
+			       ((Item_field*) keyuse->val->real_item())->field,
+			       keyuse->val->real_item()->full_name());
 
   return new store_key_item(thd,
 			    key_part->field,
 			    key_buff + maybe_null,
 			    maybe_null ? key_buff : 0,
 			    key_part->length,
-			    keyuse->val);
+			    keyuse->val, FALSE);
 }
 
 /**
-  This function is only called for const items on fields which are keys.
-
-  @return
-    returns 1 if there was some conversion made when the field was stored.
-*/
-
-bool
-store_val_in_field(Field *field, Item *item, enum_check_fields check_flag)
-{
-  bool error;
-  TABLE *table= field->table;
-  THD *thd= table->in_use;
-  ha_rows cuted_fields=thd->cuted_fields;
-  my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
-                                                   table->write_set);
-
-  /*
-    we should restore old value of count_cuted_fields because
-    store_val_in_field can be called from mysql_insert 
-    with select_insert, which make count_cuted_fields= 1
-   */
-  enum_check_fields old_count_cuted_fields= thd->count_cuted_fields;
-  thd->count_cuted_fields= check_flag;
-  error= item->save_in_field(field, 1);
-  thd->count_cuted_fields= old_count_cuted_fields;
-  dbug_tmp_restore_column_map(table->write_set, old_map);
-  return error || cuted_fields != thd->cuted_fields;
-}
-
-
-/**
   @details Initialize a JOIN as a query execution plan
   that accesses a single table via a table scan.
 
@@ -6154,10 +8261,12 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
     DBUG_RETURN(TRUE);                        /* purecov: inspected */
 
   join_tab= parent->join_tab_reexec;
-  parent->table_reexec[0]= temp_table;
-  tables= 1;
+  table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table;
+  table_count= top_join_tab_count= 1;
+
   const_tables= 0;
   const_table_map= 0;
+  eliminated_tables= 0;
   tmp_table_param.field_count= tmp_table_param.sum_func_count=
     tmp_table_param.func_count= 0;
   /*
@@ -6169,29 +8278,45 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
   first_record= sort_and_group=0;
   send_records= (ha_rows) 0;
-  group= 0;
+
+  if (group_optimized_away && !tmp_table_param.precomputed_group_by)
+  {
+    /*
+      If grouping has been optimized away, a temporary table is
+      normally not needed unless we're explicitly requested to create
+      one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
+
+      In this case (grouping was optimized away), temp_table was
+      created without a grouping expression and JOIN::exec() will not
+      perform the necessary grouping (by the use of end_send_group()
+      or end_write_group()) if JOIN::group is set to false.
+
+      There is one exception: if the loose index scan access method is
+      used to read into the temporary table, grouping and aggregate
+      functions are handled.
+    */
+    // the temporary table was explicitly requested
+    DBUG_ASSERT(test(select_options & OPTION_BUFFER_RESULT));
+    // the temporary table does not have a grouping expression
+    DBUG_ASSERT(!temp_table->group); 
+  }
+  else
+    group= false;
+
   row_limit= unit->select_limit_cnt;
   do_send_rows= row_limit ? 1 : 0;
 
-  join_tab->cache.buff=0;			/* No caching */
+  bzero(join_tab, sizeof(JOIN_TAB));
   join_tab->table=temp_table;
-  join_tab->select=0;
-  join_tab->select_cond=0;
-  join_tab->quick=0;
+  join_tab->set_select_cond(NULL, __LINE__);
   join_tab->type= JT_ALL;			/* Map through all records */
   join_tab->keys.init();
   join_tab->keys.set_all();                     /* test everything in quick */
-  join_tab->info=0;
-  join_tab->on_expr_ref=0;
-  join_tab->last_inner= 0;
-  join_tab->first_unmatched= 0;
   join_tab->ref.key = -1;
-  join_tab->not_used_in_distinct=0;
+  join_tab->shortcut_for_distinct= false;
   join_tab->read_first_record= join_init_read_record;
   join_tab->join= this;
   join_tab->ref.key_parts= 0;
-  join_tab->filesort_used_loose_index_scan= false;
-  join_tab->filesort_used_loose_index_scan_agg_distinct= false;
   bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record));
   temp_table->status=0;
   temp_table->null_row=0;
@@ -6199,16 +8324,18 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
 }
 
 
-inline void add_cond_and_fix(Item **e1, Item *e2)
+inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2)
 {
   if (*e1)
   {
+    if (!e2)
+      return;
     Item *res;
     if ((res= new Item_cond_and(*e1, e2)))
     {
-      *e1= res;
-      res->quick_fix_field();
+      res->fix_fields(thd, 0);
       res->update_used_tables();
+      *e1= res;
     }
   }
   else
@@ -6269,13 +8396,15 @@ inline void add_cond_and_fix(Item **e1, Item *e2)
 
 static void add_not_null_conds(JOIN *join)
 {
+  JOIN_TAB *tab;
   DBUG_ENTER("add_not_null_conds");
-  for (uint i=join->const_tables ; i < join->tables ; i++)
+  
+  for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
-    if ((tab->type == JT_REF || tab->type == JT_EQ_REF || 
-         tab->type == JT_REF_OR_NULL) &&
-        !tab->table->maybe_null)
+    if (tab->type == JT_REF || tab->type == JT_EQ_REF || 
+        tab->type == JT_REF_OR_NULL)
     {
       for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
       {
@@ -6283,15 +8412,25 @@ static void add_not_null_conds(JOIN *join)
         {
           Item *item= tab->ref.items[keypart];
           Item *notnull;
-          DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
-          Item_field *not_null_item= (Item_field*)item;
+          Item *real= item->real_item();
+	  if (real->const_item() && real->type() != Item::FIELD_ITEM && 
+              !real->is_expensive())
+          {
+            /*
+              It could be constant instead of field after constant
+              propagation.
+            */
+            continue;
+          }
+          DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
+          Item_field *not_null_item= (Item_field*)real;
           JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
           /*
             For UPDATE queries such as:
             UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
             not_null_item is the t1.f1, but it's referred_tab is 0.
           */
-          if (!referred_tab || referred_tab->join != join)
+          if (!referred_tab)
             continue;
           if (!(notnull= new Item_func_isnotnull(not_null_item)))
             DBUG_VOID_RETURN;
@@ -6304,9 +8443,21 @@ static void add_not_null_conds(JOIN *join)
           if (notnull->fix_fields(join->thd, &notnull))
             DBUG_VOID_RETURN;
           DBUG_EXECUTE("where",print_where(notnull,
-                                           referred_tab->table->alias,
+                                           referred_tab->table->alias.c_ptr(),
                                            QT_ORDINARY););
-          add_cond_and_fix(&referred_tab->select_cond, notnull);
+          if (!tab->first_inner)
+	  {
+            COND *new_cond= referred_tab->join == join ? 
+                              referred_tab->select_cond :
+                              join->outer_ref_cond;
+            add_cond_and_fix(join->thd, &new_cond, notnull);
+            if (referred_tab->join == join)
+              referred_tab->set_select_cond(new_cond, __LINE__);
+            else 
+              join->outer_ref_cond= new_cond;
+          }
+          else
+            add_cond_and_fix(join->thd, tab->first_inner->on_expr_ref, notnull);
         }
       }
     }
@@ -6321,6 +8472,12 @@ static void add_not_null_conds(JOIN *join)
   nested outer join and so on until it reaches root_tab
   (root_tab can be 0).
 
+  In other words:
+  add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should 
+  wrap parts of WHERE.  The idea is that the part of WHERE should be only
+  evaluated after we've finished figuring out whether outer joins.
+  ^^^ is the above correct?
+
   @param tab       the first inner table for most nested outer join
   @param cond      the predicate to be guarded (must be set)
   @param root_tab  the first inner table to stop
@@ -6348,6 +8505,12 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
 }
 
 
+bool TABLE_LIST::is_active_sjm()
+{ 
+  return sj_mat_info && sj_mat_info->is_used;
+}
+
+
 /**
   Fill in outer join related info for the execution plan structure.
 
@@ -6365,6 +8528,12 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
     corresponding first inner table through the field t0->on_expr_ref.
     Here ti are structures of the JOIN_TAB type.
 
+    In other words, for each join tab, set
+     - first_inner
+     - last_inner
+     - first_upper
+     - on_expr_ref, cond_equal
+
   EXAMPLE. For the query: 
   @code
         SELECT * FROM t1
@@ -6390,18 +8559,37 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
     has been chosen.
 */
 
-static void
+static bool
 make_outerjoin_info(JOIN *join)
 {
   DBUG_ENTER("make_outerjoin_info");
-  for (uint i=join->const_tables ; i < join->tables ; i++)
+  
+  /*
+    Create temp. tables for merged SJ-Materialization nests. We need to do
+    this now, because further code relies on tab->table and
+    tab->table->pos_in_table_list being set.
+  */
+  JOIN_TAB *tab;
+  for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
-    TABLE *table=tab->table;
+    if (tab->bush_children)
+    {
+      if (setup_sj_materialization_part1(tab))
+        DBUG_RETURN(TRUE);
+      tab->table->reginfo.join_tab= tab;
+    }
+  }
+
+  for (JOIN_TAB *tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    TABLE *table= tab->table;
     TABLE_LIST *tbl= table->pos_in_table_list;
     TABLE_LIST *embedding= tbl->embedding;
 
-    if (tbl->outer_join)
+    if (tbl->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT))
     {
       /* 
         Table tab is the only one inner table for outer join.
@@ -6411,11 +8599,19 @@ make_outerjoin_info(JOIN *join)
       tab->last_inner= tab->first_inner= tab;
       tab->on_expr_ref= &tbl->on_expr;
       tab->cond_equal= tbl->cond_equal;
-      if (embedding)
+      if (embedding && !embedding->is_active_sjm())
         tab->first_upper= embedding->nested_join->first_nested;
     }    
     for ( ; embedding ; embedding= embedding->embedding)
     {
+      if (embedding->is_active_sjm())
+      {
+        /* We're trying to walk out of an SJ-Materialization nest. Don't do this.  */
+        break;
+      }
+      /* Ignore sj-nests: */
+      if (!(embedding->on_expr && embedding->outer_join))
+        continue;
       NESTED_JOIN *nested_join= embedding->nested_join;
       if (!nested_join->counter)
       {
@@ -6431,13 +8627,20 @@ make_outerjoin_info(JOIN *join)
       }
       if (!tab->first_inner)  
         tab->first_inner= nested_join->first_nested;
-      if (++nested_join->counter < nested_join->join_list.elements)
+      if (tab->table->reginfo.not_exists_optimize)
+        tab->first_inner->table->reginfo.not_exists_optimize= 1;         
+      if (++nested_join->counter < nested_join->n_tables)
         break;
       /* Table tab is the last inner table for nested join. */
       nested_join->first_nested->last_inner= tab;
+      if (tab->first_inner->table->reginfo.not_exists_optimize)
+      {
+        for (JOIN_TAB *join_tab= tab->first_inner; join_tab <= tab; join_tab++)
+          join_tab->table->reginfo.not_exists_optimize= 1;
+      } 
     }
   }
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -6450,75 +8653,133 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
   {
     add_not_null_conds(join);
     table_map used_tables;
+    /*
+      Step #1: Extract constant condition
+       - Extract and check the constant part of the WHERE 
+       - Extract constant parts of ON expressions from outer 
+         joins and attach them appropriately.
+    */
     if (cond)                /* Because of QUICK_GROUP_MIN_MAX_SELECT */
     {                        /* there may be a select without a cond. */    
-      if (join->tables > 1)
+      if (join->table_count > 1)
         cond->update_used_tables();		// Tablenr may have changed
-      if (join->const_tables == join->tables &&
+      if (join->const_tables == join->table_count &&
 	  thd->lex->current_select->master_unit() ==
 	  &thd->lex->unit)		// not upper level SELECT
         join->const_table_map|=RAND_TABLE_BIT;
+
+      /*
+        Extract expressions that depend on constant tables
+        1. Const part of the join's WHERE clause can be checked immediately
+           and if it is not satisfied then the join has empty result
+        2. Constant parts of outer joins' ON expressions must be attached 
+           there inside the triggers.
+      */
       {						// Check const tables
-        COND *const_cond=
-	  make_cond_for_table(cond,
+        join->exec_const_cond=
+	  make_cond_for_table(thd, cond,
                               join->const_table_map,
-                              (table_map) 0);
-        DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY););
-        for (JOIN_TAB *tab= join->join_tab+join->const_tables;
-             tab < join->join_tab+join->tables ; tab++)
+                              (table_map) 0, -1, FALSE, FALSE);
+        /* Add conditions added by add_not_null_conds(). */
+        for (uint i= 0 ; i < join->const_tables ; i++)
+          add_cond_and_fix(thd, &join->exec_const_cond,
+                           join->join_tab[i].select_cond);
+
+        DBUG_EXECUTE("where",print_where(join->exec_const_cond,"constants",
+					 QT_ORDINARY););
+        if (join->exec_const_cond && !join->exec_const_cond->is_expensive() &&
+            !join->exec_const_cond->val_int())
         {
-          if (*tab->on_expr_ref)
+          DBUG_PRINT("info",("Found impossible WHERE condition"));
+          join->exec_const_cond= NULL;
+          DBUG_RETURN(1);	 // Impossible const condition
+        }
+
+        if (join->table_count != join->const_tables)
+        {
+          COND *outer_ref_cond= make_cond_for_table(thd, cond,
+                                                    join->const_table_map |
+                                                    OUTER_REF_TABLE_BIT,
+                                                    OUTER_REF_TABLE_BIT,
+                                                    -1, FALSE, FALSE);
+          if (outer_ref_cond)
           {
-            JOIN_TAB *cond_tab= tab->first_inner;
-            COND *tmp= make_cond_for_table(*tab->on_expr_ref,
-                                           join->const_table_map,
-                                         (  table_map) 0);
-            if (!tmp)
-              continue;
-            tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
-            if (!tmp)
-              DBUG_RETURN(1);
-            tmp->quick_fix_field();
-            cond_tab->select_cond= !cond_tab->select_cond ? tmp :
-	                            new Item_cond_and(cond_tab->select_cond,
-                                                      tmp);
-            if (!cond_tab->select_cond)
-	      DBUG_RETURN(1);
-            cond_tab->select_cond->quick_fix_field();
-          }       
+            add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond);
+            join->outer_ref_cond= outer_ref_cond;
+          }
         }
-        if (const_cond && !const_cond->val_int())
+        else
         {
-	  DBUG_PRINT("info",("Found impossible WHERE condition"));
-	  DBUG_RETURN(1);	 // Impossible const condition
+          COND *pseudo_bits_cond=
+            make_cond_for_table(thd, cond,
+                                join->const_table_map |
+                                PSEUDO_TABLE_BITS,
+                                PSEUDO_TABLE_BITS,
+                                -1, FALSE, FALSE);
+          if (pseudo_bits_cond)
+          {
+            add_cond_and_fix(thd, &pseudo_bits_cond,
+                             join->pseudo_bits_cond);
+            join->pseudo_bits_cond= pseudo_bits_cond;
+          }
         }
       }
     }
+
+    /*
+      Step #2: Extract WHERE/ON parts
+    */
+    table_map save_used_tables= 0;
     used_tables=((select->const_tables=join->const_table_map) |
 		 OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
-    for (uint i=join->const_tables ; i < join->tables ; i++)
+    JOIN_TAB *tab;
+    table_map current_map;
+    uint i= join->const_tables;
+    for (tab= first_depth_first_tab(join); tab;
+         tab= next_depth_first_tab(join, tab), i++)
     {
-      JOIN_TAB *tab=join->join_tab+i;
+      bool is_hj;
       /*
         first_inner is the X in queries like:
         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
       */
-      JOIN_TAB *first_inner_tab= tab->first_inner; 
-      table_map current_map= tab->table->map;
+      JOIN_TAB *first_inner_tab= tab->first_inner;
+
+      if (!tab->bush_children)
+        current_map= tab->table->map;
+      else
+        current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables;
+
       bool use_quick_range=0;
       COND *tmp;
 
+      /* 
+        Tables that are within SJ-Materialization nests cannot have their
+        conditions referring to preceding non-const tables.
+         - If we're looking at the first SJM table, reset used_tables
+           to refer to only allowed tables
+      */
+      if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info && 
+          tab->emb_sj_nest->sj_mat_info->is_used &&
+          !(used_tables & tab->emb_sj_nest->sj_inner_tables))
+      {
+        save_used_tables= used_tables;
+        used_tables= join->const_table_map | OUTER_REF_TABLE_BIT | 
+                     RAND_TABLE_BIT;
+      }
+
       /*
 	Following force including random expression in last table condition.
 	It solve problem with select like SELECT * FROM t1 WHERE rand() > 0.5
       */
-      if (i == join->tables-1)
+      if (tab == join->join_tab + join->top_join_tab_count - 1)
 	current_map|= OUTER_REF_TABLE_BIT | RAND_TABLE_BIT;
       used_tables|=current_map;
 
       if (tab->type == JT_REF && tab->quick &&
-	  (uint) tab->ref.key == tab->quick->index &&
-	  tab->ref.key_length < tab->quick->max_used_key_length)
+	  (((uint) tab->ref.key == tab->quick->index &&
+	    tab->ref.key_length < tab->quick->max_used_key_length) ||
+	    tab->table->intersect_keys.is_set(tab->ref.key)))
       {
 	/* Range uses longer key;  Use this instead of ref on key */
 	tab->type=JT_ALL;
@@ -6531,16 +8792,47 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           We will use join cache here : prevent sorting of the first
           table only and sort at the end.
         */
-        if (i != join->const_tables && join->tables > join->const_tables + 1)
+        if (i != join->const_tables &&
+            join->table_count > join->const_tables + 1 &&
+            join->best_positions[i].use_join_buffer)
           join->full_join= 1;
       }
 
       tmp= NULL;
+
       if (cond)
-        tmp= make_cond_for_table(cond,used_tables,current_map);
+      {
+        if (tab->bush_children)
+        {
+          // Reached the materialization tab
+          tmp= make_cond_after_sjm(cond, cond, save_used_tables, used_tables, 
+                                   /*inside_or_clause=*/FALSE);
+          used_tables= save_used_tables | used_tables;
+          save_used_tables= 0;
+        }
+        else
+         {
+	  tmp= make_cond_for_table(thd, cond, used_tables, current_map, i,
+                                   FALSE, FALSE);
+         }
+        /* Add conditions added by add_not_null_conds(). */
+        if (tab->select_cond)
+          add_cond_and_fix(thd, &tmp, tab->select_cond);
+      }
+
+      is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) &&
+             (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
+	     ((join->max_allowed_join_cache_level+1)/2 == 2 ||
+              ((join->max_allowed_join_cache_level+1)/2 > 2 &&
+	       is_hash_join_key_no(tab->ref.key))) &&
+              (!tab->emb_sj_nest ||                     
+               join->allowed_semijoin_with_cache) && 
+              (!(tab->table->map & join->outer_join) ||
+               join->allowed_outer_join_with_cache);
+
       if (cond && !tmp && tab->quick)
       {						// Outer join
-        if (tab->type != JT_ALL)
+        if (tab->type != JT_ALL && !is_hj)
         {
           /*
             Don't use the quick method
@@ -6562,9 +8854,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
         }
 
       }
-      if (tmp || !cond || tab->type == JT_REF)
+      if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
+          tab->type == JT_EQ_REF || first_inner_tab)
       {
-        DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
+        DBUG_EXECUTE("where",print_where(tmp, 
+                                         tab->table? tab->table->alias.c_ptr() :"sjm-nest",
+                                         QT_ORDINARY););
 	SQL_SELECT *sel= tab->select= ((SQL_SELECT*)
                                        thd->memdup((uchar*) select,
                                                    sizeof(*select)));
@@ -6584,35 +8879,51 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           */
           if (!(tmp= add_found_match_trig_cond(first_inner_tab, tmp, 0)))
             DBUG_RETURN(1);
-          tab->select_cond=sel->cond=tmp;
+          sel->cond= tmp;
+          tab->set_select_cond(tmp, __LINE__);
           /* Push condition to storage engine if this is enabled
              and the condition is not guarded */
-	  if (thd->variables.optimizer_switch &
-              OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN)
+          if (tab->table)
           {
-            COND *push_cond= 
-              make_cond_for_table(tmp, tab->table->map, tab->table->map);
-            if (push_cond)
+            tab->table->file->pushed_cond= NULL;
+            if (((thd->variables.optimizer_switch &
+                               OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) ||
+                 (tab->table->file->ha_table_flags() &
+                  HA_MUST_USE_TABLE_CONDITION_PUSHDOWN)) &&
+                !first_inner_tab)
             {
-              /* Push condition to handler */
-              if (!tab->table->file->cond_push(push_cond))
-                tab->table->file->pushed_cond= push_cond;
+              COND *push_cond= 
+              make_cond_for_table(thd, tmp, current_map, current_map,
+                                  -1, FALSE, FALSE);
+              if (push_cond)
+              {
+                /* Push condition to handler */
+                if (!tab->table->file->cond_push(push_cond))
+                  tab->table->file->pushed_cond= push_cond;
+              }
             }
           }
         }
         else
-          tab->select_cond= sel->cond= NULL;
+        {
+          sel->cond= NULL;
+          tab->set_select_cond(NULL, __LINE__);
+        }
 
 	sel->head=tab->table;
-        DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
+        DBUG_EXECUTE("where",
+                     print_where(tmp, 
+                                 tab->table ? tab->table->alias.c_ptr() :
+                                   "(sjm-nest)",
+                                 QT_ORDINARY););
 	if (tab->quick)
 	{
 	  /* Use quick key read if it's a constant and it's not used
 	     with key reading */
-          if (tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
+          if ((tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
               tab->type != JT_FT &&
               ((tab->type != JT_CONST && tab->type != JT_REF) ||
-               (uint)tab->ref.key == tab->quick->index))
+               (uint) tab->ref.key == tab->quick->index)) || is_hj)
           {
             DBUG_ASSERT(tab->quick->is_valid());
 	    sel->quick=tab->quick;		// Use value from get_quick_...
@@ -6625,7 +8936,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	  }
 	  tab->quick=0;
 	}
-	uint ref_key=(uint) sel->head->reginfo.join_tab->ref.key+1;
+	uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0;
 	if (i == join->const_tables && ref_key)
 	{
 	  if (!tab->const_keys.is_clear_all() &&
@@ -6644,12 +8955,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	    the index if we are using limit and this is the first table
 	  */
 
-	  if ((cond &&
-              !tab->keys.is_subset(tab->const_keys) && i > 0) ||
-	      (!tab->const_keys.is_clear_all() && i == join->const_tables &&
-	       join->unit->select_limit_cnt <
-	       join->best_positions[i].records_read &&
-	       !(join->select_options & OPTION_FOUND_ROWS)))
+	  if (!tab->table->is_filled_at_execution() &&
+              ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
+               (!tab->const_keys.is_clear_all() && i == join->const_tables &&
+                join->unit->select_limit_cnt <
+                join->best_positions[i].records_read &&
+                !(join->select_options & OPTION_FOUND_ROWS))))
 	  {
 	    /* Join with outer join condition */
 	    COND *orig_cond=sel->cond;
@@ -6666,11 +8977,13 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	      sel->cond->quick_fix_field();
 
 	    if (sel->test_quick_select(thd, tab->keys,
-				       used_tables & ~ current_map,
+				       ((used_tables & ~ current_map) |
+                                        OUTER_REF_TABLE_BIT),
 				       (join->select_options &
 					OPTION_FOUND_ROWS ?
 					HA_POS_ERROR :
-					join->unit->select_limit_cnt), 0) < 0)
+					join->unit->select_limit_cnt), 0,
+                                        FALSE) < 0)
             {
 	      /*
 		Before reporting "Impossible WHERE" for the whole query
@@ -6683,7 +8996,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
                                          (join->select_options &
                                           OPTION_FOUND_ROWS ?
                                           HA_POS_ERROR :
-                                          join->unit->select_limit_cnt),0) < 0)
+                                          join->unit->select_limit_cnt),0,
+                                          FALSE) < 0)
 		DBUG_RETURN(1);			// Impossible WHERE
             }
             else
@@ -6696,40 +9010,44 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	  else
 	  {
 	    sel->needed_reg=tab->needed_reg;
-	    sel->quick_keys.clear_all();
 	  }
+	  sel->quick_keys= tab->table->quick_keys;
 	  if (!sel->quick_keys.is_subset(tab->checked_keys) ||
               !sel->needed_reg.is_subset(tab->checked_keys))
 	  {
-	    tab->keys=sel->quick_keys;
-            tab->keys.merge(sel->needed_reg);
+            /*
+              "Range checked for each record" is a "last resort" access method
+              that should only be used when the other option is a cross-product
+              join.
+
+              We use the following condition (it's approximate):
+              1. There are potential keys for (sel->needed_reg)
+              2. There were no possible ways to construct a quick select, or
+                 the quick select would be more expensive than the full table
+                 scan.
+            */
 	    tab->use_quick= (!sel->needed_reg.is_clear_all() &&
-			     (select->quick_keys.is_clear_all() ||
-			      (select->quick &&
-			       (select->quick->records >= 100L)))) ?
+			     (sel->quick_keys.is_clear_all() ||
+                              (sel->quick && 
+                               sel->quick->read_time > 
+                               tab->table->file->scan_time() + 
+                               tab->table->file->stats.records/TIME_FOR_COMPARE
+                               ))) ?
 	      2 : 1;
 	    sel->read_tables= used_tables & ~current_map;
+            sel->quick_keys.clear_all();
 	  }
-	  if (i != join->const_tables && tab->use_quick != 2)
+	  if (i != join->const_tables && tab->use_quick != 2 &&
+              !tab->first_inner)
 	  {					/* Read with cache */
-	    if (cond &&
-                (tmp=make_cond_for_table(cond,
-					 join->const_table_map |
-					 current_map,
-					 current_map)))
-	    {
-              DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
-	      tab->cache.select=(SQL_SELECT*)
-		thd->memdup((uchar*) sel, sizeof(SQL_SELECT));
-	      tab->cache.select->cond=tmp;
-	      tab->cache.select->read_tables=join->const_table_map;
-	    }
-	  }
+            if (tab->make_scan_filter())
+              DBUG_RETURN(1);
+          }
 	}
       }
       
       /* 
-        Push down conditions from all on expressions.
+        Push down conditions from all ON expressions.
         Each of these conditions are guarded by a variable
         that turns if off just before null complemented row for
         outer joins is formed. Thus, the condition from an
@@ -6737,16 +9055,32 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
         the null complemented row.
       */ 
 
-      /* First push down constant conditions from on expressions */
-      for (JOIN_TAB *join_tab= join->join_tab+join->const_tables;
-           join_tab < join->join_tab+join->tables ; join_tab++)
+      /* 
+        First push down constant conditions from ON expressions. 
+         - Each pushed-down condition is wrapped into trigger which is 
+           enabled only for non-NULL-complemented record
+         - The condition is attached to the first_inner_table.
+        
+        With regards to join nests:
+         - if we start at top level, don't walk into nests
+         - if we start inside a nest, stay within that nest.
+      */
+      JOIN_TAB *start_from= tab->bush_root_tab? 
+                               tab->bush_root_tab->bush_children->start : 
+                               join->join_tab + join->const_tables;
+      JOIN_TAB *end_with= tab->bush_root_tab? 
+                               tab->bush_root_tab->bush_children->end : 
+                               join->join_tab + join->top_join_tab_count;
+      for (JOIN_TAB *join_tab= start_from;
+           join_tab != end_with;
+           join_tab++)
       {
         if (*join_tab->on_expr_ref)
         {
           JOIN_TAB *cond_tab= join_tab->first_inner;
-          COND *tmp= make_cond_for_table(*join_tab->on_expr_ref,
+          COND *tmp= make_cond_for_table(thd, *join_tab->on_expr_ref,
                                          join->const_table_map,
-                                         (table_map) 0);
+                                         (table_map) 0, -1, FALSE, FALSE);
           if (!tmp)
             continue;
           tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
@@ -6758,13 +9092,22 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           if (!cond_tab->select_cond)
 	    DBUG_RETURN(1);
           cond_tab->select_cond->quick_fix_field();
+          cond_tab->select_cond->update_used_tables();
+          if (cond_tab->select)
+            cond_tab->select->cond= cond_tab->select_cond; 
         }       
       }
 
-      /* Push down non-constant conditions from on expressions */
+
+      /* Push down non-constant conditions from ON expressions */
       JOIN_TAB *last_tab= tab;
+
+      /*
+        while we're inside of an outer join and last_tab is 
+        the last of its tables ... 
+      */
       while (first_inner_tab && first_inner_tab->last_inner == last_tab)
-      {  
+      { 
         /* 
           Table tab is the last inner table of an outer join.
           An on expression is always attached to it.
@@ -6773,15 +9116,59 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 
         table_map used_tables2= (join->const_table_map |
                                  OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
-	for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++)
+
+        start_from= tab->bush_root_tab? 
+                      tab->bush_root_tab->bush_children->start : 
+                      join->join_tab + join->const_tables;
+        for (JOIN_TAB *tab= start_from; tab <= last_tab; tab++)
         {
+          DBUG_ASSERT(tab->table);
           current_map= tab->table->map;
           used_tables2|= current_map;
-          COND *tmp_cond= make_cond_for_table(on_expr, used_tables2,
-                                             current_map);
-          if (tmp_cond)
+          /*
+            psergey: have put the -1 below. It's bad, will need to fix it.
+          */
+          COND *tmp_cond= make_cond_for_table(thd, on_expr, used_tables2,
+                                              current_map, /*(tab - first_tab)*/ -1,
+					      FALSE, FALSE);
+          bool is_sjm_lookup_tab= FALSE;
+          if (tab->bush_children)
+          {
+            /*
+              'tab' is an SJ-Materialization tab, i.e. we have a join order 
+              like this:
+
+                ot1 sjm_tab LEFT JOIN ot2 ot3
+                         ^          ^
+                   'tab'-+          +--- left join we're adding triggers for
+
+              LEFT JOIN's ON expression may not have references to subquery
+              columns.  The subquery was in the WHERE clause, so IN-equality 
+              is in the WHERE clause, also.
+              However, equality propagation code may have propagated the
+              IN-equality into ON expression, and we may get things like
+
+                subquery_inner_table=const
+
+              in the ON expression. We must not check such conditions during
+              SJM-lookup, because 1) subquery_inner_table has no valid current
+              row (materialization temp.table has it instead), and 2) they
+              would be true anyway.
+            */
+            SJ_MATERIALIZATION_INFO *sjm=
+              tab->bush_children->start->emb_sj_nest->sj_mat_info;
+            if (sjm->is_used && !sjm->is_sj_scan)
+              is_sjm_lookup_tab= TRUE;
+          }
+
+          if (tab == first_inner_tab && tab->on_precond && !is_sjm_lookup_tab)
+            add_cond_and_fix(thd, &tmp_cond, tab->on_precond);
+          if (tmp_cond && !is_sjm_lookup_tab)
           {
             JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : tab;
+            Item **sel_cond_ref= tab < first_inner_tab ?
+                                   &first_inner_tab->on_precond :
+                                   &tab->select_cond;
             /*
               First add the guards for match variables of
               all embedding outer join operations.
@@ -6804,15 +9191,18 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
               tmp_cond->quick_fix_field();
 	    /* Add the predicate to other pushed down predicates */
             DBUG_PRINT("info", ("Item_cond_and"));
-            cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond :
-	                          new Item_cond_and(cond_tab->select_cond,
-                                                    tmp_cond);
+            *sel_cond_ref= !(*sel_cond_ref) ? 
+                             tmp_cond :
+                             new Item_cond_and(*sel_cond_ref, tmp_cond);
             DBUG_PRINT("info", ("Item_cond_and 0x%lx",
-                                (ulong)cond_tab->select_cond));
-            if (!cond_tab->select_cond)
-	      DBUG_RETURN(1);
-            cond_tab->select_cond->quick_fix_field();
-          }              
+                                (ulong)(*sel_cond_ref)));
+            if (!(*sel_cond_ref))
+              DBUG_RETURN(1);
+            (*sel_cond_ref)->quick_fix_field();
+            (*sel_cond_ref)->update_used_tables();
+            if (cond_tab->select)
+              cond_tab->select->cond= cond_tab->select_cond;
+          }
         }
         first_inner_tab= first_inner_tab->first_upper;       
       }
@@ -6822,6 +9212,325 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 }
 
 
+static
+uint get_next_field_for_derived_key(uchar *arg)
+{
+  KEYUSE *keyuse= *(KEYUSE **) arg;
+  if (!keyuse)
+    return (uint) (-1);
+  TABLE *table= keyuse->table;
+  uint key= keyuse->key;
+  uint fldno= keyuse->keypart; 
+  uint keypart= keyuse->keypart_map == (key_part_map) 1 ?
+                                         0 : (keyuse-1)->keypart+1;
+  for ( ; 
+        keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
+        keyuse++)
+    keyuse->keypart= keypart;
+  if (keyuse->key != key)
+    keyuse= 0;
+  *((KEYUSE **) arg)= keyuse;
+  return fldno;
+}
+
+
+static
+uint get_next_field_for_derived_key_simple(uchar *arg)
+{
+  KEYUSE *keyuse= *(KEYUSE **) arg;
+  if (!keyuse)
+    return (uint) (-1);
+  TABLE *table= keyuse->table;
+  uint key= keyuse->key;
+  uint fldno= keyuse->keypart; 
+  for ( ; 
+        keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
+        keyuse++)
+    ;
+  if (keyuse->key != key)
+    keyuse= 0;
+  *((KEYUSE **) arg)= keyuse;
+  return fldno;
+}
+
+static 
+bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys)
+{
+  TABLE *table= keyuse->table;
+  if (table->alloc_keys(keys))
+    return TRUE;
+  uint key_count= 0;
+  KEYUSE *first_keyuse= keyuse;
+  uint prev_part= keyuse->keypart;
+  uint parts= 0;
+  uint i= 0;
+
+  for ( ; i < count && key_count < keys; )
+  {
+    do
+    {
+      keyuse->key= table->s->keys;
+      keyuse->keypart_map= (key_part_map) (1 << parts);     
+      keyuse++;
+      i++;
+    } 
+    while (i < count && keyuse->used_tables == first_keyuse->used_tables &&
+           keyuse->keypart == prev_part);
+    parts++;
+    if (i < count && keyuse->used_tables == first_keyuse->used_tables)
+    {
+      prev_part= keyuse->keypart;
+    }
+    else
+    {
+      KEYUSE *save_first_keyuse= first_keyuse;
+      if (table->check_tmp_key(table->s->keys, parts,
+                               get_next_field_for_derived_key_simple, 
+                               (uchar *) &first_keyuse))
+ 
+      {
+        first_keyuse= save_first_keyuse;
+        if (table->add_tmp_key(table->s->keys, parts, 
+                               get_next_field_for_derived_key, 
+                               (uchar *) &first_keyuse,
+                               FALSE))
+          return TRUE;
+        table->reginfo.join_tab->keys.set_bit(table->s->keys);
+      }
+      else
+      {
+        /* Mark keyuses for this key to be excluded */
+        for (KEYUSE *curr=save_first_keyuse; curr < keyuse; curr++)
+	{
+          curr->key= MAX_KEY;
+        }
+      }
+      first_keyuse= keyuse;
+      key_count++;
+      parts= 0;
+      prev_part= keyuse->keypart;
+    }
+  }             
+
+  return FALSE;
+}
+   
+
+static
+bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array)
+{
+  KEYUSE *keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
+  uint elements= keyuse_array->elements;
+  TABLE *prev_table= 0;
+  for (uint i= 0; i < elements; i++, keyuse++)
+  {
+    if (!keyuse->table)
+      break;
+    KEYUSE *first_table_keyuse= NULL;
+    table_map last_used_tables= 0;
+    uint count= 0;
+    uint keys= 0;
+    TABLE_LIST *derived= NULL;
+    if (keyuse->table != prev_table)
+      derived= keyuse->table->pos_in_table_list;
+    while (derived && derived->is_materialized_derived())
+    {
+      if (keyuse->table != prev_table)
+      {
+        prev_table= keyuse->table;
+        while (keyuse->table == prev_table && keyuse->key != MAX_KEY)
+	{
+          keyuse++;
+          i++;
+        }
+        if (keyuse->table != prev_table)
+	{
+          keyuse--;
+          i--;
+          derived= NULL;
+          continue;
+        }
+        first_table_keyuse= keyuse;
+        last_used_tables= keyuse->used_tables;
+        count= 0;
+        keys= 0;
+      }
+      else if (keyuse->used_tables != last_used_tables)
+      {
+        keys++;
+        last_used_tables= keyuse->used_tables;
+      }
+      count++;
+      keyuse++;
+      i++;
+      if (keyuse->table != prev_table)
+      {
+        if (generate_derived_keys_for_table(first_table_keyuse, count, ++keys))
+          return TRUE;
+        keyuse--;
+        i--;
+	derived= NULL;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+/*
+  @brief
+  Drops unused keys for each materialized derived table/view
+
+  @details
+  For materialized derived tables only ref access can be used, it employs
+  only one index, thus we don't need the rest. For each materialized derived
+  table/view call TABLE::use_index to save one index chosen by the optimizer
+  and free others. No key is chosen then all keys will be dropped.
+*/
+
+void JOIN::drop_unused_derived_keys()
+{
+  JOIN_TAB *tab;
+  for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+  {
+    
+    TABLE *table=tab->table;
+    if (!table)
+      continue;
+    if (!table->pos_in_table_list->is_materialized_derived())
+      continue;
+    if (table->max_keys > 1)
+      table->use_index(tab->ref.key);
+    if (table->s->keys)
+    {
+      if (tab->ref.key >= 0)
+        tab->ref.key= 0;
+      else
+        table->s->keys= 0;
+    }
+    tab->keys= (key_map) (table->s->keys ? 1 : 0);
+  }
+}
+
+
+/*
+  Evaluate the bitmap of used tables for items from the select list
+*/
+
+inline void JOIN::eval_select_list_used_tables()
+{
+  select_list_used_tables= 0;
+  Item *item;
+  List_iterator_fast<Item> it(fields_list);
+  while ((item= it++))
+  {
+    select_list_used_tables|= item->used_tables();
+  }
+  Item_outer_ref *ref;
+  List_iterator_fast<Item_outer_ref> ref_it(select_lex->inner_refs_list);
+  while ((ref= ref_it++))
+  {
+    item= ref->outer_ref;
+    select_list_used_tables|= item->used_tables();
+  }
+}
+
+
+/*
+  Determine {after which table we'll produce ordered set} 
+
+  SYNOPSIS
+    make_join_orderinfo()
+     join
+
+   
+  DESCRIPTION 
+    Determine if the set is already ordered for ORDER BY, so it can 
+    disable join cache because it will change the ordering of the results.
+    Code handles sort table that is at any location (not only first after 
+    the const tables) despite the fact that it's currently prohibited.
+    We must disable join cache if the first non-const table alone is
+    ordered. If there is a temp table the ordering is done as a last
+    operation and doesn't prevent join cache usage.
+
+  RETURN
+    Number of table after which the set will be ordered
+    join->tables if we don't need an ordered set 
+*/
+
+static uint make_join_orderinfo(JOIN *join)
+{
+  /*
+    This function needs to be fixed to take into account that we now have SJM
+    nests.
+  */
+  DBUG_ASSERT(0);
+
+  JOIN_TAB *tab;
+  if (join->need_tmp)
+    return join->table_count;
+  tab= join->get_sort_by_join_tab();
+  return tab ? tab-join->join_tab : join->table_count;
+}
+
+/*
+  Deny usage of join buffer for the specified table
+
+  SYNOPSIS
+    set_join_cache_denial()
+      tab    join table for which join buffer usage is to be denied  
+     
+  DESCRIPTION
+    The function denies usage of join buffer when joining the table 'tab'.
+    The table is marked as not employing any join buffer. If a join cache
+    object has been already allocated for the table this object is destroyed.
+
+  RETURN
+    none    
+*/
+
+static
+void set_join_cache_denial(JOIN_TAB *join_tab)
+{
+  if (join_tab->cache)
+  {
+    /* 
+      If there is a previous cache linked to this cache through the
+      next_cache pointer: remove the link. 
+    */
+    if (join_tab->cache->prev_cache)
+      join_tab->cache->prev_cache->next_cache= 0;
+    /*
+      No need to do the same for next_cache since cache denial is done
+      backwards starting from the latest cache in the linked list (see
+      revise_cache_usage()).
+    */
+    DBUG_ASSERT(!join_tab->cache->next_cache);
+
+    join_tab->cache->free();
+    join_tab->cache= 0;
+  }
+  if (join_tab->use_join_cache)
+  {
+    join_tab->use_join_cache= FALSE;
+    join_tab->used_join_cache_level= 0;
+    /*
+      It could be only sub_select(). It could not be sub_seject_sjm because we
+      don't do join buffering for the first table in sjm nest. 
+    */
+    join_tab[-1].next_select= sub_select;
+    if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join())
+    {
+      join_tab->type= JT_ALL;
+      join_tab->ref.key_parts= 0;
+    }
+    join_tab->join->return_tab= join_tab;
+  }
+}
+
+
 /**
   The default implementation of unlock-row method of READ_RECORD,
   used in all access methods.
@@ -6830,11 +9539,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 void rr_unlock_row(st_join_table *tab)
 {
   READ_RECORD *info= &tab->read_record;
-  info->file->unlock_row();
+  info->table->file->unlock_row();
 }
 
 
-
 /**
   Pick the appropriate access method functions
 
@@ -6885,47 +9593,700 @@ pick_table_access_method(JOIN_TAB *tab)
 }
 
 
-static void
-make_join_readinfo(JOIN *join, ulonglong options)
+/* 
+  Revise usage of join buffer for the specified table and the whole nest   
+
+  SYNOPSIS
+    revise_cache_usage()
+      tab    join table for which join buffer usage is to be revised  
+
+  DESCRIPTION
+    The function revise the decision to use a join buffer for the table 'tab'.
+    If this table happened to be among the inner tables of a nested outer join/
+    semi-join the functions denies usage of join buffers for all of them
+
+  RETURN
+    none    
+*/
+
+static
+void revise_cache_usage(JOIN_TAB *join_tab)
+{
+  JOIN_TAB *tab;
+  JOIN_TAB *first_inner;
+
+  if (join_tab->first_inner)
+  {
+    JOIN_TAB *end_tab= join_tab;
+    for (first_inner= join_tab->first_inner; 
+         first_inner;
+         first_inner= first_inner->first_upper)           
+    {
+      for (tab= end_tab; tab >= first_inner; tab--)
+        set_join_cache_denial(tab);
+      end_tab= first_inner;
+    }
+  }
+  else if (join_tab->first_sj_inner_tab)
+  {
+    first_inner= join_tab->first_sj_inner_tab;
+    for (tab= join_tab; tab >= first_inner; tab--)
+    {
+      set_join_cache_denial(tab);
+    }
+  }
+  else set_join_cache_denial(join_tab);
+}
+
+
+/*
+  end_select-compatible function that writes the record into a sjm temptable
+  
+  SYNOPSIS
+    end_sj_materialize()
+      join            The join 
+      join_tab        Points to right after the last join_tab in materialization bush
+      end_of_records  FALSE <=> This call is made to pass another record 
+                                combination
+                      TRUE  <=> EOF (no action)
+
+  DESCRIPTION
+    This function is used by semi-join materialization to capture suquery's
+    resultset and write it into the temptable (that is, materialize it).
+
+  NOTE
+    This function is used only for semi-join materialization. Non-semijoin
+    materialization uses different mechanism.
+
+  RETURN 
+    NESTED_LOOP_OK
+    NESTED_LOOP_ERROR
+*/
+
+enum_nested_loop_state 
+end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
+{
+  int error;
+  THD *thd= join->thd;
+  SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info;
+  DBUG_ENTER("end_sj_materialize");
+  if (!end_of_records)
+  {
+    TABLE *table= sjm->table;
+
+    List_iterator<Item> it(sjm->sjm_table_cols);
+    Item *item;
+    while ((item= it++))
+    {
+      if (item->is_null())
+        DBUG_RETURN(NESTED_LOOP_OK);
+    }
+    fill_record(thd, table->field, sjm->sjm_table_cols, TRUE, FALSE);
+    if (thd->is_error())
+      DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    if ((error= table->file->ha_write_tmp_row(table->record[0])))
+    {
+      /* create_myisam_from_heap will generate error if needed */
+      if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
+          create_internal_tmp_table_from_heap(thd, table,
+                                              sjm->sjm_table_param.start_recinfo, 
+                                              &sjm->sjm_table_param.recinfo, error, 1, NULL))
+        DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    }
+  }
+  DBUG_RETURN(NESTED_LOOP_OK);
+}
+
+
+/* 
+  Check whether a join buffer can be used to join the specified table   
+
+  SYNOPSIS
+    check_join_cache_usage()
+      tab                 joined table to check join buffer usage for
+      options             options of the join
+      no_jbuf_after       don't use join buffering after table with this number
+      prev_tab            previous join table
+
+  DESCRIPTION
+    The function finds out whether the table 'tab' can be joined using a join
+    buffer. This check is performed after the best execution plan for 'join'
+    has been chosen. If the function decides that a join buffer can be employed
+    then it selects the most appropriate join cache object that contains this
+    join buffer.
+    The result of the check and the type of the the join buffer to be used
+    depend on:
+      - the access method to access rows of the joined table
+      - whether the join table is an inner table of an outer join or semi-join
+      - whether the optimizer switches
+          outer_join_with_cache, semijoin_with_cache, join_cache_incremental,
+          join_cache_hashed, join_cache_bka,
+        are set on or off
+      - the join cache level set for the query
+      - the join 'options'.
+
+    In any case join buffer is not used if the number of the joined table is
+    greater than 'no_jbuf_after'. It's also never used if the value of
+    join_cache_level is equal to 0.
+    If the optimizer switch outer_join_with_cache is off no join buffer is
+    used for outer join operations.
+    If the optimizer switch semijoin_with_cache is off no join buffer is used
+    for semi-join operations.
+    If the optimizer switch join_cache_incremental is off no incremental join
+    buffers are used.
+    If the optimizer switch join_cache_hashed is off then the optimizer uses
+    neither BNLH algorithm, nor BKAH algorithm to perform join operations.
+
+    If the optimizer switch join_cache_bka is off then the optimizer uses
+    neither BKA algorithm, nor BKAH algorithm to perform join operation.
+    The valid settings for join_cache_level lay in the interval 0..8.
+    If it set to 0 no join buffers are used to perform join operations.
+    Currently we differentiate between join caches of 8 levels:
+      1 : non-incremental join cache used for BNL join algorithm
+      2 : incremental join cache used for BNL join algorithm
+      3 : non-incremental join cache used for BNLH join algorithm
+      4 : incremental join cache used for BNLH join algorithm
+      5 : non-incremental join cache used for BKA join algorithm
+      6 : incremental join cache used for BKA join algorithm 
+      7 : non-incremental join cache used for BKAH join algorithm 
+      8 : incremental join cache used for BKAH join algorithm
+    If the value of join_cache_level is set to n then no join caches of
+    levels higher than n can be employed.
+
+    If the optimizer switches outer_join_with_cache, semijoin_with_cache,
+    join_cache_incremental, join_cache_hashed, join_cache_bka are all on
+    the following rules are applied.
+    If join_cache_level==1|2 then join buffer is used for inner joins, outer
+    joins and semi-joins with 'JT_ALL' access method. In this case a
+    JOIN_CACHE_BNL object is employed.
+    If join_cache_level==3|4 and then join buffer is used for a join operation
+    (inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method
+    then a JOIN_CACHE_BNLH object is employed. 
+    If an index is used to access rows of the joined table and the value of
+    join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed. 
+    If an index is used to access rows of the joined table and the value of
+    join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed. 
+    If the value of join_cache_level is odd then creation of a non-linked 
+    join cache is forced.
+
+    Currently for any join operation a join cache of the  level of the
+    highest allowed and applicable level is used.
+    For example, if join_cache_level is set to 6 and the optimizer switch
+    join_cache_bka is off, while the optimizer switch join_cache_hashed is
+    on then for any inner join operation with JT_REF/JT_EQREF access method
+    to the joined table the BNLH join algorithm will be used, while for
+    the table accessed by the JT_ALL methods the BNL algorithm will be used.
+
+    If the function decides that a join buffer can be used to join the table
+    'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns
+    the selected join cache object to the field 'cache' of the previous
+    join table. 
+    If the function creates a join cache object it tries to initialize it. The
+    failure to do this results in an invocation of the function that destructs
+    the created object.
+    If the function decides that but some reasons no join buffer can be used
+    for a table it calls the function revise_cache_usage that checks
+    whether join cache should be denied for some previous tables. In this case
+    a pointer to the first table for which join cache usage has been denied
+    is passed in join->return_val (see the function set_join_cache_denial).
+    
+    The functions changes the value the fields tab->icp_other_tables_ok and
+    tab->idx_cond_fact_out to FALSE if the chosen join cache algorithm 
+    requires it.
+ 
+  NOTES
+    An inner table of a nested outer join or a nested semi-join can be currently
+    joined only when a linked cache object is employed. In these cases setting
+    join_cache_incremental to 'off' results in denial of usage of any join
+    buffer when joining the table.
+    For a nested outer join/semi-join, currently, we either use join buffers for
+    all inner tables or for none of them. 
+    Some engines (e.g. Falcon) currently allow to use only a join cache
+    of the type JOIN_CACHE_BKAH when the joined table is accessed through
+    an index. For these engines setting the value of join_cache_level to 5 or 6
+    results in that no join buffer is used to join the table. 
+  
+  RETURN VALUE
+    cache level if cache is used, otherwise returns 0
+
+  TODO
+    Support BKA inside SJ-Materialization nests. When doing this, we'll need
+    to only store sj-inner tables in the join buffer.
+#if 0
+        JOIN_TAB *first_tab= join->join_tab+join->const_tables;
+        uint n_tables= i-join->const_tables;
+        / *
+          We normally put all preceding tables into the join buffer, except
+          for the constant tables.
+          If we're inside a semi-join materialization nest, e.g.
+
+             outer_tbl1  outer_tbl2  ( inner_tbl1, inner_tbl2 ) ...
+                                                       ^-- we're here
+
+          then we need to put into the join buffer only the tables from
+          within the nest.
+        * /
+        if (i >= first_sjm_table && i < last_sjm_table)
+        {
+          n_tables= i - first_sjm_table; // will be >0 if we got here
+          first_tab= join->join_tab + first_sjm_table;
+        }
+#endif
+*/
+
+static
+uint check_join_cache_usage(JOIN_TAB *tab,
+                            ulonglong options,
+                            uint no_jbuf_after,
+                            uint table_index,
+                            JOIN_TAB *prev_tab)
+{
+  COST_VECT cost;
+  uint flags= 0;
+  ha_rows rows= 0;
+  uint bufsz= 4096;
+  JOIN_CACHE *prev_cache=0;
+  JOIN *join= tab->join;
+  uint cache_level= tab->used_join_cache_level;
+  bool force_unlinked_cache=
+         !(join->allowed_join_cache_types & JOIN_CACHE_INCREMENTAL_BIT);
+  bool no_hashed_cache=
+         !(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT);
+  bool no_bka_cache= 
+         !(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT);
+
+  join->return_tab= 0;
+
+  /*
+    Don't use join cache if @@join_cache_level==0 or this table is the first
+    one join suborder (either at top level or inside a bush)
+  */
+  if (cache_level == 0 || !prev_tab)
+    return 0;
+
+  if (force_unlinked_cache && (cache_level%2 == 0))
+    cache_level--;
+
+  if (options & SELECT_NO_JOIN_CACHE)
+    goto no_join_cache;
+
+  if (tab->use_quick == 2)
+    goto no_join_cache;
+
+  if (tab->table->map & join->complex_firstmatch_tables)
+    goto no_join_cache;
+  
+  /*
+    Don't use join cache if we're inside a join tab range covered by LooseScan
+    strategy (TODO: LooseScan is very similar to FirstMatch so theoretically it 
+    should be possible to use join buffering in the same way we're using it for
+    multi-table firstmatch ranges).
+  */
+  if (tab->inside_loosescan_range)
+    goto no_join_cache;
+
+  if (tab->is_inner_table_of_semijoin() &&
+      !join->allowed_semijoin_with_cache)
+    goto no_join_cache;
+  if (tab->is_inner_table_of_outer_join() &&
+      !join->allowed_outer_join_with_cache)
+    goto no_join_cache;
+
+  /*
+    Non-linked join buffers can't guarantee one match
+  */
+  if (tab->is_nested_inner())
+  {
+    if (force_unlinked_cache || cache_level == 1)
+      goto no_join_cache;
+    if (cache_level & 1)
+      cache_level--;
+  }
+    
+  /*
+    Don't use BKA for materialized tables. We could actually have a
+    meaningful use of BKA when linked join buffers are used.
+
+    The problem is, the temp.table is not filled (actually not even opened
+    properly) yet, and this doesn't let us call
+    handler->multi_range_read_info(). It is possible to come up with
+    estimates, etc. without acessing the table, but it seems not to worth the
+    effort now.
+  */
+  if (tab->table->pos_in_table_list->is_materialized_derived())
+    no_bka_cache= true;
+
+  /*
+    Don't use join buffering if we're dictated not to by no_jbuf_after
+    (This is not meaningfully used currently)
+  */
+  if (table_index > no_jbuf_after)
+    goto no_join_cache;
+  
+  /*
+    TODO: BNL join buffer should be perfectly ok with tab->bush_children.
+  */
+  if (tab->loosescan_match_tab || tab->bush_children)
+    goto no_join_cache;
+
+  for (JOIN_TAB *first_inner= tab->first_inner; first_inner;
+       first_inner= first_inner->first_upper)
+  {
+    if (first_inner != tab && 
+        (!first_inner->use_join_cache || !(tab-1)->use_join_cache))
+      goto no_join_cache;
+  }
+  if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab &&
+      (!tab->first_sj_inner_tab->use_join_cache || !(tab-1)->use_join_cache))
+    goto no_join_cache;
+  if (!prev_tab->use_join_cache)
+  {
+    /* 
+      Check whether table tab and the previous one belong to the same nest of
+      inner tables and if so do not use join buffer when joining table tab. 
+    */
+    if (tab->first_inner && tab != tab->first_inner)
+    {
+      for (JOIN_TAB *first_inner= tab[-1].first_inner;
+           first_inner;
+           first_inner= first_inner->first_upper)
+      {
+        if (first_inner == tab->first_inner)
+          goto no_join_cache;
+      }
+    }
+    else if (tab->first_sj_inner_tab && tab != tab->first_sj_inner_tab &&
+             tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab)
+      goto no_join_cache; 
+  }       
+
+  prev_cache= prev_tab->cache;
+
+  switch (tab->type) {
+  case JT_ALL:
+    if (cache_level == 1)
+      prev_cache= 0;
+    if ((tab->cache= new JOIN_CACHE_BNL(join, tab, prev_cache)) &&
+        ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+    {
+      tab->icp_other_tables_ok= FALSE;
+      return (2-test(!prev_cache));
+    }
+    goto no_join_cache;
+  case JT_SYSTEM:
+  case JT_CONST:
+  case JT_REF:
+  case JT_EQ_REF:
+    if (cache_level <=2 || (no_hashed_cache && no_bka_cache))
+      goto no_join_cache;
+    if (tab->ref.is_access_triggered())
+      goto no_join_cache;
+      
+    if (!tab->is_ref_for_hash_join() && !no_bka_cache)
+    {
+      flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
+      if (tab->table->covering_keys.is_set(tab->ref.key))
+        flags|= HA_MRR_INDEX_ONLY;
+      rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
+                                                    tab->ref.key_parts,
+                                                    &bufsz, &flags, &cost);
+    }
+
+    if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache ||
+        tab->is_ref_for_hash_join() ||
+	((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6))
+    {
+      if (!tab->hash_join_is_possible() ||
+          tab->make_scan_filter())
+        goto no_join_cache;
+      if (cache_level == 3)
+        prev_cache= 0;
+      if ((tab->cache= new JOIN_CACHE_BNLH(join, tab, prev_cache)) &&
+          ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+      {
+        tab->icp_other_tables_ok= FALSE;        
+        return (4-test(!prev_cache));
+      }
+      goto no_join_cache;
+    }
+    if (cache_level > 4 && no_bka_cache)
+      goto no_join_cache;
+    
+    if ((flags & HA_MRR_NO_ASSOCIATION) &&
+	(cache_level <= 6 || no_hashed_cache))
+      goto no_join_cache;
+
+    if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL))
+    {
+      if (cache_level <= 6 || no_hashed_cache)
+      {
+        if (cache_level == 5)
+          prev_cache= 0;
+        if ((tab->cache= new JOIN_CACHE_BKA(join, tab, flags, prev_cache)) &&
+            ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+          return (6-test(!prev_cache));
+        goto no_join_cache;
+      }
+      else
+      {
+        if (cache_level == 7)
+          prev_cache= 0;
+        if ((tab->cache= new JOIN_CACHE_BKAH(join, tab, flags, prev_cache)) &&
+            ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+	{
+         tab->idx_cond_fact_out= FALSE;
+          return (8-test(!prev_cache));
+        }
+        goto no_join_cache;
+      }
+    }
+    goto no_join_cache;
+  default : ;
+  }
+
+no_join_cache:
+  if (tab->type != JT_ALL && tab->is_ref_for_hash_join())
+  {
+    tab->type= JT_ALL;
+    tab->ref.key_parts= 0;
+  }
+  revise_cache_usage(tab); 
+  return 0;
+}
+
+
+/* 
+  Check whether join buffers can be used to join tables of a join   
+
+  SYNOPSIS
+    check_join_cache_usage()
+      join                join whose tables are to be checked             
+      options             options of the join
+      no_jbuf_after       don't use join buffering after table with this number
+                          (The tables are assumed to be numbered in
+                          first_linear_tab(join, WITHOUT_CONST_TABLES),
+                          next_linear_tab(join, WITH_CONST_TABLES) order).
+
+  DESCRIPTION
+    For each table after the first non-constant table the function checks
+    whether the table can be joined using a join buffer. If the function decides
+    that a join buffer can be employed then it selects the most appropriate join
+    cache object that contains this join buffer whose level is not greater
+    than join_cache_level set for the join. To make this check the function
+    calls the function check_join_cache_usage for every non-constant table.
+
+  NOTES
+    In some situations (e.g. for nested outer joins, for nested semi-joins) only
+    incremental buffers can be used. If it turns out that for some inner table
+    no join buffer can be used then any inner table of an outer/semi-join nest
+    cannot use join buffer. In the case when already chosen buffer must be
+    denied for a table the function recalls check_join_cache_usage()
+    starting from this table. The pointer to the table from which the check
+    has to be restarted is returned in join->return_val (see the description
+    of check_join_cache_usage).
+*/
+
+void check_join_cache_usage_for_tables(JOIN *join, ulonglong options,
+                                       uint no_jbuf_after)
+{
+  JOIN_TAB *tab;
+  JOIN_TAB *prev_tab;
+
+  for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    tab->used_join_cache_level= join->max_allowed_join_cache_level;  
+  }
+
+  uint idx= join->const_tables;
+  for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+restart:
+    tab->icp_other_tables_ok= TRUE;
+    tab->idx_cond_fact_out= TRUE;
+    
+    /* 
+      Check if we have a preceding join_tab, as something that will feed us
+      records that we could buffer. We don't have it, if 
+       - this is the first non-const table in the join order,
+       - this is the first table inside an SJM nest.
+    */
+    prev_tab= tab - 1;
+    if (tab == join->join_tab + join->const_tables ||
+        (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab))
+      prev_tab= NULL;
+
+    switch (tab->type) {
+    case JT_SYSTEM:
+    case JT_CONST:
+    case JT_EQ_REF:
+    case JT_REF:
+    case JT_REF_OR_NULL:
+    case JT_ALL:
+      tab->used_join_cache_level= check_join_cache_usage(tab, options,
+                                                         no_jbuf_after,
+                                                         idx,
+                                                         prev_tab);
+      tab->use_join_cache= test(tab->used_join_cache_level);
+      /*
+        psergey-merge: todo: raise the question that this is really stupid that
+        we can first allocate a join buffer, then decide not to use it and free
+        it.
+      */
+      if (join->return_tab)
+      {
+        tab= join->return_tab;
+        goto restart;
+      }
+      break; 
+    default:
+      tab->used_join_cache_level= 0;
+    }
+    if (!tab->bush_children)
+      idx++;
+  }
+}
+
+
+/*
+  Plan refinement stage: do various setup things for the executor
+
+  SYNOPSIS
+    make_join_readinfo()
+      join           Join being processed
+      options        Join's options (checking for SELECT_DESCRIBE, 
+                     SELECT_NO_JOIN_CACHE)
+      no_jbuf_after  Don't use join buffering after table with this number.
+
+  DESCRIPTION
+    Plan refinement stage: do various set ups for the executioner
+      - set up use of join buffering
+      - push index conditions
+      - increment relevant counters
+      - etc
+
+  RETURN 
+    FALSE - OK
+    TRUE  - Out of memory
+*/
+
+static bool
+make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
 {
+  JOIN_TAB *tab;
   uint i;
+  DBUG_ENTER("make_join_readinfo");
+
   bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
-  bool ordered_set= 0;
   bool sorted= 1;
-  DBUG_ENTER("make_join_readinfo");
 
-  for (i=join->const_tables ; i < join->tables ; i++)
+  join->complex_firstmatch_tables= table_map(0);
+
+  if (!join->select_lex->sj_nests.is_empty() &&
+      setup_semijoin_dups_elimination(join, options, no_jbuf_after))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  
+  /* For const tables, set partial_join_cardinality to 1. */
+  for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++)
+    tab->partial_join_cardinality= 1; 
+
+  JOIN_TAB *prev_tab= NULL;
+  i= join->const_tables;
+  for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
+       tab; 
+       prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    /*
+      The approximation below for partial join cardinality is not good because
+        - it does not take into account some pushdown predicates
+        - it does not differentiate between inner joins, outer joins and
+        semi-joins.
+      Later it should be improved.
+    */
+
+    if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)
+      prev_tab= NULL;
+    DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table);
+
+    tab->partial_join_cardinality= join->best_positions[i].records_read *
+                                   (prev_tab? prev_tab->partial_join_cardinality : 1);
+    if (!tab->bush_children)
+      i++;
+  }
+ 
+  check_join_cache_usage_for_tables(join, options, no_jbuf_after);
+  
+  JOIN_TAB *first_tab;
+  for (tab= first_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
+    if (tab->bush_children)
+    {
+      if (setup_sj_materialization_part2(tab))
+        return TRUE;
+    }
+
     TABLE *table=tab->table;
+    uint jcl= tab->used_join_cache_level;
     tab->read_record.table= table;
-    tab->read_record.file=table->file;
     tab->read_record.unlock_row= rr_unlock_row;
-    tab->next_select=sub_select;		/* normal select */
+    tab->sorted= sorted;
+    sorted= 0;                                  // only first must be sorted
+    
 
     /*
-      Determine if the set is already ordered for ORDER BY, so it can 
-      disable join cache because it will change the ordering of the results.
-      Code handles sort table that is at any location (not only first after 
-      the const tables) despite the fact that it's currently prohibited.
-      We must disable join cache if the first non-const table alone is
-      ordered. If there is a temp table the ordering is done as a last
-      operation and doesn't prevent join cache usage.
+      We should not set tab->next_select for the last table in the
+      SMJ-nest, as setup_sj_materialization() has already set it to
+      end_sj_materialize.
     */
-    if (!ordered_set && !join->need_tmp && 
-        (table == join->sort_by_table ||
-         (join->sort_by_table == (TABLE *) 1 && i != join->const_tables)))
-      ordered_set= 1;
+    if (!(tab->bush_root_tab && 
+          tab->bush_root_tab->bush_children->end == tab + 1))
+    {
+      tab->next_select=sub_select;		/* normal select */
+    }
 
-    tab->sorted= sorted;
-    sorted= 0;                                  // only first must be sorted
+
+    if (tab->loosescan_match_tab)
+    {
+      if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
+                                                         loosescan_key_len)))
+        return TRUE; /* purecov: inspected */
+      tab->sorted= TRUE;
+    }
     table->status=STATUS_NO_RECORD;
     pick_table_access_method (tab);
 
+    if (jcl)
+       tab[-1].next_select=sub_select_cache;
+
+    if (tab->cache && tab->cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)
+      tab->type= JT_HASH;
+      
     switch (tab->type) {
+    case JT_SYSTEM:				// Only happens with left join 
+    case JT_CONST:				// Only happens with left join
+      /* Only happens with outer joins */
+      tab->read_first_record= tab->type == JT_SYSTEM ?
+                                join_read_system :join_read_const;
+      if (table->covering_keys.is_set(tab->ref.key) &&
+          !table->no_keyread)
+        table->enable_keyread();
+      else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
+        push_index_cond(tab, tab->ref.key);
+      break;
     case JT_EQ_REF:
       tab->read_record.unlock_row= join_read_key_unlock_row;
       /* fall through */
+      if (table->covering_keys.is_set(tab->ref.key) &&
+	  !table->no_keyread)
+        table->enable_keyread();
+      else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
+        push_index_cond(tab, tab->ref.key);
+      break;
     case JT_REF_OR_NULL:
     case JT_REF:
       if (tab->select)
@@ -6935,27 +10296,20 @@ make_join_readinfo(JOIN *join, ulonglong options)
       }
       delete tab->quick;
       tab->quick=0;
-      /* fall through */
-    case JT_CONST:				// Only happens with left join
       if (table->covering_keys.is_set(tab->ref.key) &&
 	  !table->no_keyread)
-        table->set_keyread(TRUE);
+        table->enable_keyread();
+      else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
+        push_index_cond(tab, tab->ref.key);
       break;
     case JT_ALL:
+    case JT_HASH:
       /*
 	If previous table use cache
         If the incoming data set is already sorted don't use cache.
+        Also don't use cache if this is the first table in semi-join
+          materialization nest.
       */
-      if (i != join->const_tables && !(options & SELECT_NO_JOIN_CACHE) &&
-          tab->use_quick != 2 && !tab->first_inner && !ordered_set)
-      {
-	if ((options & SELECT_DESCRIBE) ||
-	    !join_init_cache(join->thd,join->join_tab+join->const_tables,
-			     i-join->const_tables))
-	{
-	  tab[-1].next_select=sub_select_cache; /* Patch previous */
-	}
-      }
       /* These init changes read_record */
       if (tab->use_quick == 2)
       {
@@ -6966,8 +10320,9 @@ make_join_readinfo(JOIN *join, ulonglong options)
       }
       else
       {
-	tab->read_first_record= join_init_read_record;
-	if (i == join->const_tables)
+        if (!tab->bush_children)
+          tab->read_first_record= join_init_read_record;
+	if (tab == first_tab)
 	{
 	  if (tab->select && tab->select->quick)
 	  {
@@ -6978,7 +10333,10 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  {
 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
 	    if (statistics)
+	    {
 	      status_var_increment(join->thd->status_var.select_scan_count);
+	      join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
+	    }
 	  }
 	}
 	else
@@ -6986,13 +10344,18 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  if (tab->select && tab->select->quick)
 	  {
 	    if (statistics)
-	      status_var_increment(join->thd->status_var.select_full_range_join_count);
+	      status_var_increment(join->thd->status_var.
+                                   select_full_range_join_count);
 	  }
 	  else
 	  {
 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
 	    if (statistics)
-	      status_var_increment(join->thd->status_var.select_full_join_count);
+	    {
+	      status_var_increment(join->thd->status_var.
+                                   select_full_join_count);
+	      join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
+	    }
 	  }
 	}
 	if (!table->no_keyread)
@@ -7000,41 +10363,107 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  if (tab->select && tab->select->quick &&
               tab->select->quick->index != MAX_KEY && //not index_merge
 	      table->covering_keys.is_set(tab->select->quick->index))
-            table->set_keyread(TRUE);
+            table->enable_keyread();
 	  else if (!table->covering_keys.is_clear_all() &&
 		   !(tab->select && tab->select->quick))
 	  {					// Only read index tree
-	    /*
-            It has turned out that the below change, while speeding things
-            up for disk-bound loads, slows them down for cases when the data
-            is in disk cache (see BUG#35850):
-	    //  See bug #26447: "Using the clustered index for a table scan
-	    //  is always faster than using a secondary index".
-            if (table->s->primary_key != MAX_KEY &&
-                table->file->primary_key_is_clustered())
-              tab->index= table->s->primary_key;
-            else
-	    */
-              tab->index=find_shortest_key(table, & table->covering_keys);
+            if (tab->loosescan_match_tab)
+              tab->index= tab->loosescan_key;
+            else 
+            {
+#ifdef BAD_OPTIMIZATION
+              /*
+                It has turned out that the below change, while speeding things
+                up for disk-bound loads, slows them down for cases when the data
+                is in disk cache (see BUG#35850):
+                See bug #26447: "Using the clustered index for a table scan
+                is always faster than using a secondary index".
+              */
+              if (table->s->primary_key != MAX_KEY &&
+                  table->file->primary_key_is_clustered())
+                tab->index= table->s->primary_key;
+              else
+#endif
+                tab->index=find_shortest_key(table, & table->covering_keys);
+            }
 	    tab->read_first_record= join_read_first;
-	    tab->type=JT_NEXT;		// Read with index_first / index_next
+            /* Read with index_first / index_next */
+	    tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT;		
 	  }
 	}
+        if (tab->select && tab->select->quick &&
+            tab->select->quick->index != MAX_KEY && ! tab->table->key_read)
+          push_index_cond(tab, tab->select->quick->index);
       }
       break;
     case JT_FT:
-    case JT_SYSTEM: 
       break;
+      /* purecov: begin deadcode */
     default:
-      DBUG_PRINT("error",("Table type %d found",tab->type)); /* purecov: deadcode */
-      break;					/* purecov: deadcode */
+      DBUG_PRINT("error",("Table type %d found",tab->type));
+      break;
     case JT_UNKNOWN:
     case JT_MAYBE_REF:
-      abort();					/* purecov: deadcode */
+      abort();
+      /* purecov: end */
     }
   }
-  join->join_tab[join->tables-1].next_select=0; /* Set by do_select */
-  DBUG_VOID_RETURN;
+  uint n_top_tables= join->join_tab_ranges.head()->end -  
+                     join->join_tab_ranges.head()->start;
+
+  join->join_tab[n_top_tables - 1].next_select=0;  /* Set by do_select */
+  
+  /*
+    If a join buffer is used to join a table the ordering by an index
+    for the first non-constant table cannot be employed anymore.
+  */
+  for (tab= join->join_tab + join->const_tables ; 
+       tab != join->join_tab + n_top_tables ; tab++)
+  {
+    if (tab->use_join_cache)
+    {
+       JOIN_TAB *sort_by_tab= join->group && join->simple_group &&
+                              join->group_list ?
+			       join->join_tab+join->const_tables :
+                               join->get_sort_by_join_tab();
+      /*
+        It could be that sort_by_tab==NULL, and the plan is to use filesort()
+        on the first table.
+      */
+      if (join->order)
+      {
+        join->simple_order= 0;
+        join->need_tmp= 1;
+      }
+
+      if (join->group && !join->group_optimized_away)
+      {
+        join->need_tmp= 1;
+        join->simple_group= 0;
+      }
+      
+      if (sort_by_tab)
+      {
+        join->need_tmp= 1;
+        join->simple_order= join->simple_group= 0;
+        if (sort_by_tab->type == JT_NEXT && 
+            !sort_by_tab->table->covering_keys.is_set(sort_by_tab->index))
+        {
+          sort_by_tab->type= JT_ALL;
+          sort_by_tab->read_first_record= join_init_read_record;
+        }
+        else if (sort_by_tab->type == JT_HASH_NEXT &&
+                 !sort_by_tab->table->covering_keys.is_set(sort_by_tab->index))
+        {
+          sort_by_tab->type= JT_HASH;
+          sort_by_tab->read_first_record= join_init_read_record;
+        }
+      }
+      break;
+    }
+  }
+
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -7054,14 +10483,11 @@ make_join_readinfo(JOIN *join, ulonglong options)
 
 bool error_if_full_join(JOIN *join)
 {
-  for (JOIN_TAB *tab=join->join_tab, *end=join->join_tab+join->tables;
-       tab < end;
-       tab++)
+  for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab;
+       tab= next_top_level_tab(join, tab))
   {
     if (tab->type == JT_ALL && (!tab->select || !tab->select->quick))
     {
-      /* This error should not be ignored. */
-      join->select_lex->no_error= FALSE;
       my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE,
                  ER(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0));
       return(1);
@@ -7073,21 +10499,58 @@ bool error_if_full_join(JOIN *join)
 
 /**
   cleanup JOIN_TAB.
+
+  DESCRIPTION 
+    This is invoked when we've finished all join executions.
 */
 
 void JOIN_TAB::cleanup()
 {
+  DBUG_ENTER("JOIN_TAB::cleanup");
+  DBUG_PRINT("enter", ("table %s.%s",
+                       (table ? table->s->db.str : "?"),
+                       (table ? table->s->table_name.str : "?")));
   delete select;
   select= 0;
   delete quick;
   quick= 0;
-  my_free(cache.buff);
-  cache.buff= 0;
+  if (cache)
+  {
+    cache->free();
+    cache= 0;
+  }
   limit= 0;
   if (table)
   {
-    table->set_keyread(FALSE);
+    table->disable_keyread();
     table->file->ha_index_or_rnd_end();
+    preread_init_done= FALSE;
+    if (table->pos_in_table_list && 
+        table->pos_in_table_list->jtbm_subselect)
+    {
+      if (table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
+      {
+        /*
+          Set this to NULL so that cleanup_empty_jtbm_semi_joins() doesn't
+          attempt to make another free_tmp_table call.
+        */
+        table->pos_in_table_list->table= NULL;
+        free_tmp_table(join->thd, table);
+        table= NULL;
+      }
+      else
+      {
+        end_read_record(&read_record);
+        table->pos_in_table_list->jtbm_subselect->cleanup();
+        /* 
+          The above call freed the materializedd temptable. Set it to NULL so
+          that we don't attempt to touch it if JOIN_TAB::cleanup() is invoked
+          multiple times (it may be)
+        */
+        table=NULL;
+      }
+      DBUG_VOID_RETURN;
+    }
     /*
       We need to reset this for next select
       (Tested in part_of_refkey)
@@ -7095,6 +10558,206 @@ void JOIN_TAB::cleanup()
     table->reginfo.join_tab= 0;
   }
   end_read_record(&read_record);
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Estimate the time to get rows of the joined table
+*/
+
+double JOIN_TAB::scan_time()
+{
+  double res;
+  if (table->created)
+  {
+    if (table->is_filled_at_execution())
+    {
+      get_delayed_table_estimates(table, &records, &read_time,
+                                    &startup_cost);
+      found_records= records;
+      table->quick_condition_rows= records;
+    }
+    else
+    {
+      found_records= records= table->file->stats.records;
+      read_time= table->file->scan_time();
+      /*
+        table->quick_condition_rows has already been set to
+        table->file->stats.records
+      */
+    }
+    res= read_time;
+  }
+  else
+  {
+    found_records= records=table->file->stats.records;
+    read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub
+    res= read_time;
+  }
+  return res;
+}
+
+
+/**
+  Estimate the number of rows that a an access method will read from a table.
+
+  @todo: why not use JOIN_TAB::found_records
+*/
+
+ha_rows JOIN_TAB::get_examined_rows()
+{
+  ha_rows examined_rows;
+
+  if (select && select->quick)
+    examined_rows= select->quick->records;
+  else if (type == JT_NEXT || type == JT_ALL ||
+           type == JT_HASH || type ==JT_HASH_NEXT)
+  {
+    if (limit)
+    {
+      /*
+        @todo This estimate is wrong, a LIMIT query may examine much more rows
+        than the LIMIT itself.
+      */
+      examined_rows= limit;
+    }
+    else
+    {
+      if (table->is_filled_at_execution())
+        examined_rows= records;
+      else
+      {
+        /*
+          handler->info(HA_STATUS_VARIABLE) has been called in
+          make_join_statistics()
+        */
+        examined_rows= table->file->stats.records;
+      }
+    }
+  }
+  else
+    examined_rows= (ha_rows) records_read; 
+
+  return examined_rows;
+}
+
+
+/**
+  Initialize the join_tab before reading.
+  Currently only derived table/view materialization is done here.
+
+  TODO: consider moving this together with join_tab_execution_startup
+*/
+bool JOIN_TAB::preread_init()
+{
+  TABLE_LIST *derived= table->pos_in_table_list;
+  if (!derived || !derived->is_materialized_derived())
+  {
+    preread_init_done= TRUE;
+    return FALSE;
+  }
+
+  /* Materialize derived table/view. */
+  if (!derived->get_unit()->executed &&
+      mysql_handle_single_derived(join->thd->lex,
+                                    derived, DT_CREATE | DT_FILL))
+      return TRUE;
+  preread_init_done= TRUE;
+  if (select && select->quick)
+    select->quick->replace_handler(table->file);
+
+  /* init ftfuns for just initialized derived table */
+  if (table->fulltext_searched)
+    init_ftfuncs(join->thd, join->select_lex, test(join->order));
+
+  return FALSE;
+}
+
+
+/**
+  Build a TABLE_REF structure for index lookup in the temporary table
+
+  @param thd             Thread handle
+  @param tmp_key         The temporary table key
+  @param it              The iterator of items for lookup in the key
+  @param skip            Number of fields from the beginning to skip
+
+  @details
+  Build TABLE_REF object for lookup in the key 'tmp_key' using items
+  accessible via item iterator 'it'.
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
+                                            KEY *tmp_key,
+                                            Item_iterator &it,
+                                            bool value,
+                                            uint skip)
+{
+  uint tmp_key_parts= tmp_key->key_parts;
+  uint i;
+  DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init");
+
+  key= 0; /* The only temp table index. */
+  key_length= tmp_key->key_length;
+  if (!(key_buff=
+        (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+      !(key_copy=
+        (store_key**) thd->alloc((sizeof(store_key*) *
+                                  (tmp_key_parts + 1)))) ||
+      !(items=
+        (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+    DBUG_RETURN(TRUE);
+
+  key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length);
+
+  KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+  store_key **ref_key= key_copy;
+  uchar *cur_ref_buff= key_buff;
+
+  it.open();
+  for (i= 0; i < skip; i++) it.next();
+  for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+  {
+    Item *item= it.next();
+    DBUG_ASSERT(item);
+    items[i]= item;
+    int null_count= test(cur_key_part->field->real_maybe_null());
+    *ref_key= new store_key_item(thd, cur_key_part->field,
+                                 /* TIMOUR:
+                                    the NULL byte is taken into account in
+                                    cur_key_part->store_length, so instead of
+                                    cur_ref_buff + test(maybe_null), we could
+                                    use that information instead.
+                                 */
+                                 cur_ref_buff + null_count,
+                                 null_count ? cur_ref_buff : 0,
+                                 cur_key_part->length, items[i], value);
+    cur_ref_buff+= cur_key_part->store_length;
+  }
+  *ref_key= NULL; /* End marker. */
+  key_err= 1;
+  key_parts= tmp_key_parts;
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Check if ref access uses "Full scan on NULL key" (i.e. it actually alternates
+  between ref access and full table scan)
+*/
+
+bool TABLE_REF::is_access_triggered()
+{
+  for (uint i = 0; i < key_parts; i++)
+  {
+    if (cond_guards[i])
+      return TRUE;
+  }
+  return FALSE;
 }
 
 
@@ -7149,7 +10812,7 @@ void JOIN::join_free()
     Optimization: if not EXPLAIN and we are done with the JOIN,
     free all tables.
   */
-  bool full= (!select_lex->uncacheable && !thd->lex->describe);
+  bool full= !(select_lex->uncacheable) &&  !(thd->lex->describe);
   bool can_unlock= full;
   DBUG_ENTER("JOIN::join_free");
 
@@ -7213,36 +10876,91 @@ void JOIN::join_free()
 void JOIN::cleanup(bool full)
 {
   DBUG_ENTER("JOIN::cleanup");
+  DBUG_PRINT("enter", ("full %u", (uint) full));
 
-  if (all_tables)
+  if (table)
   {
-    JOIN_TAB *tab,*end;
+    JOIN_TAB *tab;
     /*
-      Free resources allocated by filesort() and Unique::get()
+      Only a sorted table may be cached.  This sorted table is always the
+      first non const table in join->table
     */
-    if (tables > const_tables) // Test for not-const tables
-      for (uint ix= const_tables; ix < tables; ++ix)
+    if (table_count > const_tables) // Test for not-const tables
+    {
+      JOIN_TAB *first_tab= first_top_level_tab(this, WITHOUT_CONST_TABLES);
+      if (first_tab->table)
       {
-        free_io_cache(all_tables[ix]);
-        filesort_free_buffers(all_tables[ix], full);
+        free_io_cache(first_tab->table);
+        filesort_free_buffers(first_tab->table, full);
       }
-
+    }
     if (full)
     {
-      for (tab= join_tab, end= tab+tables; tab != end; tab++)
-	tab->cleanup();
+      JOIN_TAB *sort_tab= first_linear_tab(this, WITH_BUSH_ROOTS, 
+                                           WITHOUT_CONST_TABLES);
+      if (pre_sort_join_tab)
+      {
+        if (sort_tab && sort_tab->select == pre_sort_join_tab->select)
+        {
+          pre_sort_join_tab->select= NULL;
+        }
+        else
+          clean_pre_sort_join_tab();
+      }
+      /*
+        Call cleanup() on join tabs used by the join optimization
+        (join->join_tab may now be pointing to result of make_simple_join
+         reading from the temporary table)
+
+        We also need to check table_count to handle various degenerate joins
+        w/o tables: they don't have some members initialized and
+        WALK_OPTIMIZATION_TABS may not work correctly for them.
+      */
+      enum enum_exec_or_opt tabs_kind;
+      if (first_breadth_first_tab(this, WALK_OPTIMIZATION_TABS))
+        tabs_kind= WALK_OPTIMIZATION_TABS;
+      else
+        tabs_kind= WALK_EXECUTION_TABS;
+      if (table_count)
+      {
+        for (tab= first_breadth_first_tab(this, tabs_kind); tab;
+             tab= next_breadth_first_tab(this, tabs_kind, tab))
+        {
+          tab->cleanup();
+        }
+
+        if (tabs_kind == WALK_OPTIMIZATION_TABS && 
+            first_breadth_first_tab(this, WALK_OPTIMIZATION_TABS) != 
+            first_breadth_first_tab(this, WALK_EXECUTION_TABS))
+        {
+          JOIN_TAB *jt= first_breadth_first_tab(this, WALK_EXECUTION_TABS);
+          /* We've walked optimization tabs. do execution ones too */
+          if (jt)
+            jt->cleanup();
+        }
+      }
+      cleaned= true;
+
     }
     else
     {
-      for (tab= join_tab, end= tab+tables; tab != end; tab++)
+      for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
+           tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
       {
 	if (tab->table)
+        {
+          DBUG_PRINT("info", ("close index: %s.%s  alias: %s",
+                              tab->table->s->db.str,
+                              tab->table->s->table_name.str,
+                              tab->table->alias.c_ptr()));
           tab->table->file->ha_index_or_rnd_end();
+        }
       }
     }
   }
   if (full)
   {
+    cleanup_empty_jtbm_semi_joins(this, join_list);
     /* 
       Ensure that the following delete_elements() would not be called
       twice for the same list.
@@ -7274,6 +10992,22 @@ void JOIN::cleanup(bool full)
         tmp_join->tmp_table_param.save_copy_field= 0;
     }
     tmp_table_param.cleanup();
+
+    if (!join_tab)
+    {
+      List_iterator<TABLE_LIST> li(*join_list);
+      TABLE_LIST *table_ref;
+      while ((table_ref= li++))
+      {
+        if (table_ref->table &&
+            table_ref->jtbm_subselect &&
+            table_ref->jtbm_subselect->is_jtbm_const_tab)
+        {
+          free_tmp_table(thd, table_ref->table);
+          table_ref->table= NULL;
+        }
+      }
+    }
   }
   DBUG_VOID_RETURN;
 }
@@ -7297,6 +11031,8 @@ void JOIN::cleanup(bool full)
   SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
   @endcode
+
+  TODO: this function checks ORDER::used, which can only have a value of 0.
 */
 
 static bool
@@ -7356,8 +11092,6 @@ eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab)
 static bool
 only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
 {
-  if (specialflag &  SPECIAL_SAFE_MODE)
-    return 0;			// skip this optimize /* purecov: inspected */
   tables&= ~PSEUDO_TABLE_BITS;
   for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
   {
@@ -7372,9 +11106,10 @@ only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
 
 static void update_depend_map(JOIN *join)
 {
-  JOIN_TAB *join_tab=join->join_tab, *end=join_tab+join->tables;
-
-  for (; join_tab != end ; join_tab++)
+  JOIN_TAB *join_tab;
+  for (join_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITH_CONST_TABLES); 
+       join_tab;
+       join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS))
   {
     TABLE_REF *ref= &join_tab->ref;
     table_map depend_map=0;
@@ -7385,11 +11120,11 @@ static void update_depend_map(JOIN *join)
     ref->depend_map=depend_map & ~OUTER_REF_TABLE_BIT;
     depend_map&= ~OUTER_REF_TABLE_BIT;
     for (JOIN_TAB **tab=join->map2table;
-	 depend_map ;
-	 tab++,depend_map>>=1 )
+         depend_map ;
+         tab++,depend_map>>=1 )
     {
       if (depend_map & 1)
-	ref->depend_map|=(*tab)->ref.depend_map;
+        ref->depend_map|=(*tab)->ref.depend_map;
     }
   }
 }
@@ -7397,7 +11132,7 @@ static void update_depend_map(JOIN *join)
 
 /** Update the dependency map for the sort order. */
 
-static void update_depend_map(JOIN *join, ORDER *order)
+static void update_depend_map_for_order(JOIN *join, ORDER *order)
 {
   for (; order ; order=order->next)
   {
@@ -7425,6 +11160,8 @@ static void update_depend_map(JOIN *join, ORDER *order)
   Remove all constants and check if ORDER only contains simple
   expressions.
 
+  We also remove all duplicate expressions, keeping only the first one.
+
   simple_order is set to 1 if sort_order only uses fields from head table
   and the head table is not a LEFT JOIN table.
 
@@ -7432,9 +11169,10 @@ static void update_depend_map(JOIN *join, ORDER *order)
   @param first_order		List of SORT or GROUP order
   @param cond			WHERE statement
   @param change_list		Set to 1 if we should remove things from list.
-                               If this is not set, then only simple_order is
-                               calculated.
-  @param simple_order		Set to 1 if we are only using simple expressions
+                                If this is not set, then only simple_order is
+                                calculated.
+  @param simple_order		Set to 1 if we are only using simple
+				expressions.
 
   @return
     Returns new sort order
@@ -7444,21 +11182,38 @@ static ORDER *
 remove_const(JOIN *join,ORDER *first_order, COND *cond,
              bool change_list, bool *simple_order)
 {
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
     return change_list ? 0 : first_order;		// No need to sort
 
-  ORDER *order,**prev_ptr;
-  table_map first_table= join->join_tab[join->const_tables].table->map;
+  ORDER *order,**prev_ptr, *tmp_order;
+  table_map first_table;
   table_map not_const_tables= ~join->const_table_map;
   table_map ref;
+  bool first_is_base_table= FALSE;
   DBUG_ENTER("remove_const");
+  
+  LINT_INIT(first_table); /* protected by first_is_base_table */
+  if (join->join_tab[join->const_tables].table)
+  {
+    first_table= join->join_tab[join->const_tables].table->map;
+    first_is_base_table= TRUE;
+  }
+  
+  /*
+    Cleanup to avoid interference of calls of this function for
+    ORDER BY and GROUP BY
+  */
+  for (JOIN_TAB *tab= join->join_tab + join->const_tables;
+       tab < join->join_tab + join->table_count;
+       tab++)
+    tab->cached_eq_ref_table= FALSE;
 
   prev_ptr= &first_order;
   *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1;
 
   /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
 
-  update_depend_map(join, first_order);
+  update_depend_map_for_order(join, first_order);
   for (order=first_order; order ; order=order->next)
   {
     table_map order_tables=order->item[0]->used_tables();
@@ -7473,16 +11228,21 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
           table for all queries containing more than one table, ROLLUP, and an
           outer join.
          */
-        (join->tables > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
+        (join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
         join->outer_join))
       *simple_order=0;				// Must do a temp table to sort
     else if (!(order_tables & not_const_tables))
     {
-      if (order->item[0]->has_subquery() && 
-          !(join->select_lex->options & SELECT_DESCRIBE))
-        order->item[0]->val_str(&order->item[0]->str_value);
+      if (order->item[0]->has_subquery())
+      {
+        /*
+          Delay the evaluation of constant ORDER and/or GROUP expressions that
+          contain subqueries until the execution phase.
+        */
+        join->exec_const_order_group_cond.push_back(order->item[0]);
+      }
       DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
-      continue;					// skip const item
+      continue;
     }
     else
     {
@@ -7495,7 +11255,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
 	  DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
 	  continue;
 	}
-	if ((ref=order_tables & (not_const_tables ^ first_table)))
+	if (first_is_base_table && (ref=order_tables & (not_const_tables ^ first_table)))
 	{
 	  if (!(order_tables & first_table) &&
               only_eq_ref_tables(join,first_order, ref))
@@ -7507,6 +11267,17 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
 	}
       }
     }
+    /* Remove ORDER BY entries that we have seen before */
+    for (tmp_order= first_order;
+         tmp_order != order;
+         tmp_order= tmp_order->next)
+    {
+      if (tmp_order->item[0]->eq(order->item[0],1))
+        break;
+    }
+    if (tmp_order != order)
+      continue;                                // Duplicate order by. Remove
+    
     if (change_list)
       *prev_ptr= order;				// use this entry
     prev_ptr= &order->next;
@@ -7561,9 +11332,9 @@ ORDER *simple_remove_const(ORDER *order, COND *where)
 
 
 static int
-return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
+return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> &tables,
 		 List<Item> &fields, bool send_row, ulonglong select_options,
-		 const char *info, Item *having)
+		 const char *info, Item *having, List<Item> &all_fields)
 {
   DBUG_ENTER("return_zero_rows");
 
@@ -7577,8 +11348,30 @@ return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
 
   if (send_row)
   {
-    for (TABLE_LIST *table= tables; table; table= table->next_leaf)
-      mark_as_null_row(table->table);		// All fields are NULL
+    /*
+      Set all tables to have NULL row. This is needed as we will be evaluating
+      HAVING condition.
+    */
+    List_iterator<TABLE_LIST> ti(tables);
+    TABLE_LIST *table;
+    while ((table= ti++))
+    {
+      /*
+        Don't touch semi-join materialization tables, as the above join_free()
+        call has freed them (and HAVING clause can't have references to them 
+        anyway).
+      */
+      if (!table->is_jtbm())
+        mark_as_null_row(table->table);		// All fields are NULL
+    }
+    List_iterator_fast<Item> it(all_fields);
+    Item *item;
+    /*
+      Inform all items (especially aggregating) to calculate HAVING correctly,
+      also we will need it for sending results.
+    */
+    while ((item= it++))
+      item->no_rows_in_result();
     if (having && having->val_int() == 0)
       send_row=0;
   }
@@ -7587,13 +11380,7 @@ return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
   {
     bool send_error= FALSE;
     if (send_row)
-    {
-      List_iterator_fast<Item> it(fields);
-      Item *item;
-      while ((item= it++))
-	item->no_rows_in_result();
-      send_error= result->send_data(fields);
-    }
+      send_error= result->send_data(fields) > 0;
     if (!send_error)
       result->send_eof();				// Should be safe
   }
@@ -7611,8 +11398,11 @@ static void clear_tables(JOIN *join)
     must clear only the non-const tables, as const tables
     are not re-calculated.
   */
-  for (uint i=join->const_tables ; i < join->tables ; i++)
-    mark_as_null_row(join->all_tables[i]);		// All fields are NULL
+  for (uint i= 0 ; i < join->table_count ; i++)
+  {
+    if (!(join->table[i]->map & join->const_table_map))
+      mark_as_null_row(join->table[i]);		// All fields are NULL
+  }
 }
 
 /*****************************************************************************
@@ -7756,6 +11546,9 @@ finish:
     acceptable, as this happens rarely. The implementation without
     copying would be much more complicated.
 
+    For description of how equality propagation works with SJM nests, grep 
+    for EqualityPropagationAndSjmNests.
+
   @param left_item   left term of the quality to be checked
   @param right_item  right term of the equality to be checked
   @param item        equality item if the equality originates from a condition
@@ -7774,24 +11567,26 @@ finish:
 static bool check_simple_equality(Item *left_item, Item *right_item,
                                   Item *item, COND_EQUAL *cond_equal)
 {
+  Item *orig_left_item= left_item;
+  Item *orig_right_item= right_item;
   if (left_item->type() == Item::REF_ITEM &&
       ((Item_ref*)left_item)->ref_type() == Item_ref::VIEW_REF)
   {
-    if (((Item_ref*)left_item)->depended_from)
+    if (((Item_ref*)left_item)->get_depended_from())
       return FALSE;
     left_item= left_item->real_item();
   }
   if (right_item->type() == Item::REF_ITEM &&
       ((Item_ref*)right_item)->ref_type() == Item_ref::VIEW_REF)
   {
-    if (((Item_ref*)right_item)->depended_from)
+    if (((Item_ref*)right_item)->get_depended_from())
       return FALSE;
     right_item= right_item->real_item();
   }
   if (left_item->type() == Item::FIELD_ITEM &&
       right_item->type() == Item::FIELD_ITEM &&
-      !((Item_field*)left_item)->depended_from &&
-      !((Item_field*)right_item)->depended_from)
+      !((Item_field*)left_item)->get_depended_from() &&
+      !((Item_field*)right_item)->get_depended_from())
   {
     /* The predicate the form field1=field2 is processed */
 
@@ -7827,12 +11622,14 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     {
       /* left_item_equal of an upper level contains left_item */
       left_item_equal= new Item_equal(left_item_equal);
+      left_item_equal->set_context_field(((Item_field*) left_item));
       cond_equal->current_level.push_back(left_item_equal);
     }
     if (right_copyfl)
     {
       /* right_item_equal of an upper level contains right_item */
       right_item_equal= new Item_equal(right_item_equal);
+      right_item_equal->set_context_field(((Item_field*) right_item));
       cond_equal->current_level.push_back(right_item_equal);
     }
 
@@ -7840,7 +11637,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     { 
       /* left item was found in the current or one of the upper levels */
       if (! right_item_equal)
-        left_item_equal->add((Item_field *) right_item);
+        left_item_equal->add(orig_right_item);
       else
       {
         /* Merge two multiple equalities forming a new one */
@@ -7855,12 +11652,14 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     { 
       /* left item was not found neither the current nor in upper levels  */
       if (right_item_equal)
-        right_item_equal->add((Item_field *) left_item);
+        right_item_equal->add(orig_left_item);
       else 
       {
         /* None of the fields was found in multiple equalities */
-        Item_equal *item_equal= new Item_equal((Item_field *) left_item,
-                                               (Item_field *) right_item);
+        Item_equal *item_equal= new Item_equal(orig_left_item,
+                                               orig_right_item,
+                                               FALSE);
+        item_equal->set_context_field((Item_field*)left_item);
         cond_equal->current_level.push_back(item_equal);
       }
     }
@@ -7871,18 +11670,21 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     /* The predicate of the form field=const/const=field is processed */
     Item *const_item= 0;
     Item_field *field_item= 0;
+    Item *orig_field_item= 0;
     if (left_item->type() == Item::FIELD_ITEM &&
-        !((Item_field*)left_item)->depended_from &&
-        right_item->const_item())
+        !((Item_field*)left_item)->get_depended_from() &&
+        right_item->const_item() && !right_item->is_expensive())
     {
-      field_item= (Item_field*) left_item;
+      orig_field_item= orig_left_item;
+      field_item= (Item_field *) left_item;
       const_item= right_item;
     }
     else if (right_item->type() == Item::FIELD_ITEM &&
-             !((Item_field*)right_item)->depended_from &&
-             left_item->const_item())
+             !((Item_field*)right_item)->get_depended_from() &&
+             left_item->const_item() && !left_item->is_expensive())
     {
-      field_item= (Item_field*) right_item;
+      orig_field_item= orig_right_item;
+      field_item= (Item_field *) right_item;
       const_item= left_item;
     }
 
@@ -7891,15 +11693,15 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     {
       bool copyfl;
 
-      if (field_item->result_type() == STRING_RESULT)
+      if (field_item->cmp_type() == STRING_RESULT)
       {
         CHARSET_INFO *cs= ((Field_str*) field_item->field)->charset();
         if (!item)
         {
           Item_func_eq *eq_item;
-          if ((eq_item= new Item_func_eq(left_item, right_item)))
+          if (!(eq_item= new Item_func_eq(orig_left_item, orig_right_item)) ||
+              eq_item->set_cmp_func())
             return FALSE;
-          eq_item->set_cmp_func();
           eq_item->quick_fix_field();
           item= eq_item;
         }  
@@ -7914,6 +11716,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
       {
         item_equal= new Item_equal(item_equal);
         cond_equal->current_level.push_back(item_equal);
+        item_equal->set_context_field(field_item);
       }
       if (item_equal)
       {
@@ -7922,11 +11725,12 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
           already contains a constant and its value is  not equal to
           the value of const_item.
         */
-        item_equal->add(const_item, field_item);
+        item_equal->add_const(const_item, orig_field_item);
       }
       else
       {
-        item_equal= new Item_equal(const_item, field_item);
+        item_equal= new Item_equal(const_item, orig_field_item, TRUE);
+        item_equal->set_context_field(field_item);
         cond_equal->current_level.push_back(item_equal);
       }
       return TRUE;
@@ -7978,21 +11782,18 @@ static bool check_row_equality(THD *thd, Item *left_row, Item_row *right_row,
                                        (Item_row *) left_item,
                                        (Item_row *) right_item,
 			               cond_equal, eq_list);
-      if (!is_converted)
-        thd->lex->current_select->cond_count++;      
     }
     else
     { 
       is_converted= check_simple_equality(left_item, right_item, 0, cond_equal);
-      thd->lex->current_select->cond_count++;
     }  
  
     if (!is_converted)
     {
       Item_func_eq *eq_item;
-      if (!(eq_item= new Item_func_eq(left_item, right_item)))
+      if (!(eq_item= new Item_func_eq(left_item, right_item)) ||
+          eq_item->set_cmp_func())
         return FALSE;
-      eq_item->set_cmp_func();
       eq_item->quick_fix_field();
       eq_list->push_back(eq_item);
     }
@@ -8043,7 +11844,6 @@ static bool check_equality(THD *thd, Item *item, COND_EQUAL *cond_equal,
     if (left_item->type() == Item::ROW_ITEM &&
         right_item->type() == Item::ROW_ITEM)
     {
-      thd->lex->current_select->cond_count--;
       return check_row_equality(thd,
                                 (Item_row *) left_item,
                                 (Item_row *) right_item,
@@ -8168,13 +11968,15 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       List_iterator_fast<Item_equal> it(cond_equal.current_level);
       while ((item_equal= it++))
       {
-        item_equal->fix_length_and_dec();
+        item_equal->fix_fields(thd, NULL);
         item_equal->update_used_tables();
         set_if_bigger(thd->lex->current_select->max_equal_elems,
-                      item_equal->members());  
+                      item_equal->n_field_items());  
       }
 
-      ((Item_cond_and*)cond)->cond_equal= cond_equal;
+      ((Item_cond_and*)cond)->cond_equal.copy(cond_equal);
+      cond_equal.current_level=
+        ((Item_cond_and*)cond)->cond_equal.current_level;
       inherited= &(((Item_cond_and*)cond)->cond_equal);
     }
     /*
@@ -8202,7 +12004,8 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       args->concat((List<Item> *)&cond_equal.current_level);
     }
   }
-  else if (cond->type() == Item::FUNC_ITEM)
+  else if (cond->type() == Item::FUNC_ITEM ||
+           cond->real_item()->type() == Item::FIELD_ITEM)
   {
     List<Item> eq_list;
     /*
@@ -8224,10 +12027,11 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       {
         if ((item_equal= cond_equal.current_level.pop()))
         {
-          item_equal->fix_length_and_dec();
+          item_equal->fix_fields(thd, NULL);
           item_equal->update_used_tables();
           set_if_bigger(thd->lex->current_select->max_equal_elems,
-                        item_equal->members());  
+                        item_equal->n_field_items());  
+          item_equal->upper_levels= inherited;
           return item_equal;
 	}
 
@@ -8248,9 +12052,10 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
           item_equal->fix_length_and_dec();
           item_equal->update_used_tables();
           set_if_bigger(thd->lex->current_select->max_equal_elems,
-                        item_equal->members());  
+                        item_equal->n_field_items());  
         }
-        and_cond->cond_equal= cond_equal;
+        and_cond->cond_equal.copy(cond_equal);
+        cond_equal.current_level= and_cond->cond_equal.current_level;
         args->concat((List<Item> *)&cond_equal.current_level);
         
         return and_cond;
@@ -8262,7 +12067,7 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       as soon the field is not of a string type or the field reference is
       an argument of a comparison predicate. 
     */ 
-    uchar *is_subst_valid= (uchar *) 1;
+    uchar* is_subst_valid= (uchar *) Item::ANY_SUBST;
     cond= cond->compile(&Item::subst_argument_checker,
                         &is_subst_valid, 
                         &Item::equal_fields_propagator,
@@ -8332,6 +12137,8 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
   @param inherited           path to all inherited multiple equality items
   @param join_list           list of join tables to which the condition
                              refers to
+  @ignore_on_conds           TRUE <-> do not build multiple equalities
+                             for on expressions
   @param[out] cond_equal_ref pointer to the structure to place built
                              equalities in
 
@@ -8339,11 +12146,13 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
     pointer to the transformed condition containing multiple equalities
 */
    
-static COND *build_equal_items(THD *thd, COND *cond,
+static COND *build_equal_items(JOIN *join, COND *cond,
                                COND_EQUAL *inherited,
                                List<TABLE_LIST> *join_list,
+                               bool ignore_on_conds,
                                COND_EQUAL **cond_equal_ref)
 {
+  THD *thd= join->thd;
   COND_EQUAL *cond_equal= 0;
 
   if (cond) 
@@ -8353,6 +12162,7 @@ static COND *build_equal_items(THD *thd, COND *cond,
     if (cond->type() == Item::COND_ITEM &&
         ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
       cond_equal= &((Item_cond_and*) cond)->cond_equal;
+
     else if (cond->type() == Item::FUNC_ITEM &&
              ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
     {
@@ -8367,7 +12177,7 @@ static COND *build_equal_items(THD *thd, COND *cond,
   }
   *cond_equal_ref= cond_equal;
 
-  if (join_list)
+  if (join_list && !ignore_on_conds)
   {
     TABLE_LIST *table;
     List_iterator<TABLE_LIST> li(*join_list);
@@ -8382,8 +12192,8 @@ static COND *build_equal_items(THD *thd, COND *cond,
           We can modify table->on_expr because its old value will
           be restored before re-execution of PS/SP.
         */
-        table->on_expr= build_equal_items(thd, table->on_expr, inherited,
-                                          nested_join_list,
+        table->on_expr= build_equal_items(join, table->on_expr, inherited,
+                                          nested_join_list, ignore_on_conds,
                                           &table->cond_equal);
       }
     }
@@ -8396,10 +12206,14 @@ static COND *build_equal_items(THD *thd, COND *cond,
 /**
   Compare field items by table order in the execution plan.
 
+    If field1 and field2 belong to different tables then
     field1 considered as better than field2 if the table containing
     field1 is accessed earlier than the table containing field2.   
     The function finds out what of two fields is better according
     this criteria.
+    If field1 and field2 belong to the same table then the result
+    of comparison depends on whether the fields are parts of
+    the key that are used to access this table.  
 
   @param field1          first field item to compare
   @param field2          second field item to compare
@@ -8413,18 +12227,24 @@ static COND *build_equal_items(THD *thd, COND *cond,
     0  otherwise
 */
 
-static int compare_fields_by_table_order(Item_field *field1,
-                                  Item_field *field2,
-                                  void *table_join_idx)
+static int compare_fields_by_table_order(Item *field1,
+                                         Item *field2,
+                                         void *table_join_idx)
 {
   int cmp= 0;
   bool outer_ref= 0;
-  if (field2->used_tables() & OUTER_REF_TABLE_BIT)
+  Item_field *f1= (Item_field *) (field1->real_item());
+  Item_field *f2= (Item_field *) (field2->real_item());
+  if (field1->const_item() || f1->const_item())
+    return 1;
+  if (field2->const_item() || f2->const_item())
+    return -1;
+  if (f2->used_tables() & OUTER_REF_TABLE_BIT)
   {  
     outer_ref= 1;
     cmp= -1;
   }
-  if (field2->used_tables() & OUTER_REF_TABLE_BIT)
+  if (f1->used_tables() & OUTER_REF_TABLE_BIT)
   {
     outer_ref= 1;
     cmp++;
@@ -8432,11 +12252,75 @@ static int compare_fields_by_table_order(Item_field *field1,
   if (outer_ref)
     return cmp;
   JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
-  cmp= idx[field2->field->table->tablenr]-idx[field1->field->table->tablenr];
+  
+  JOIN_TAB *tab1= idx[f1->field->table->tablenr];
+  JOIN_TAB *tab2= idx[f2->field->table->tablenr];
+  
+  /* 
+    if one of the table is inside a merged SJM nest and another one isn't,
+    compare SJM bush roots of the tables.
+  */
+  if (tab1->bush_root_tab != tab2->bush_root_tab)
+  {
+    if (tab1->bush_root_tab)
+      tab1= tab1->bush_root_tab;
+
+    if (tab2->bush_root_tab)
+      tab2= tab2->bush_root_tab;
+  }
+  
+  cmp= tab2 - tab1;
+
+  if (!cmp)
+  {
+    JOIN_TAB *tab= idx[f1->field->table->tablenr];
+    uint keyno= MAX_KEY;
+    if (tab->ref.key_parts)
+      keyno= tab->ref.key;
+    else if (tab->select && tab->select->quick)
+       keyno = tab->select->quick->index;
+    if (keyno != MAX_KEY)
+    {
+      if (f2->field->part_of_key.is_set(keyno))
+        cmp= -1;
+      if (f1->field->part_of_key.is_set(keyno))
+        cmp++;
+      if (!cmp)
+      {
+        KEY *key_info= tab->table->key_info + keyno;
+        for (uint i= 0; i < key_info->key_parts; i++)
+	{
+          Field *fld= key_info->key_part[i].field;
+          if (fld->eq(f2->field))
+	  {
+	    cmp= -1;
+            break;
+          }
+          if (fld->eq(f1->field))
+	  {
+	    cmp= 1;
+            break;
+          }
+        }
+      }              
+    }              
+    else   
+      cmp= f2->field->field_index-f1->field->field_index;
+  }
   return cmp < 0 ? -1 : (cmp ? 1 : 0);
 }
 
 
+static TABLE_LIST* embedding_sjm(Item *item)
+{
+  Item_field *item_field= (Item_field *) (item->real_item());
+  TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
+  if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
+    return nest;
+  else
+    return NULL;
+}
+
 /**
   Generate minimal set of simple equalities equivalent to a multiple equality.
 
@@ -8470,6 +12354,25 @@ static int compare_fields_by_table_order(Item_field *field1,
     So only t1.a=t3.c should be left in the lower level.
     If cond is equal to 0, then not more then one equality is generated
     and a pointer to it is returned as the result of the function.
+    
+    Equality substutution and semi-join materialization nests:
+
+       In case join order looks like this:
+
+          outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 
+
+        We must not construct equalities like 
+
+           outer_tbl1.col = inner_tbl1.col 
+
+        because they would get attached to inner_tbl1 and will get evaluated
+        during materialization phase, when we don't have current value of
+        outer_tbl1.col.
+
+        Item_equal::get_first() also takes similar measures for dealing with
+        equality substitution in presense of SJM nests.
+
+    Grep for EqualityPropagationAndSjmNests for a more verbose description.
 
   @return
     - The condition with generated simple equalities or
@@ -8477,76 +12380,190 @@ static int compare_fields_by_table_order(Item_field *field1,
     - 0, otherwise.
 */
 
-static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
-                                  Item_equal *item_equal)
+Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+                           Item_equal *item_equal)
 {
   List<Item> eq_list;
   Item_func_eq *eq_item= 0;
   if (((Item *) item_equal)->const_item() && !item_equal->val_int())
     return new Item_int((longlong) 0,1); 
   Item *item_const= item_equal->get_const();
-  Item_equal_iterator it(*item_equal);
+  Item_equal_fields_iterator it(*item_equal);
   Item *head;
+  TABLE_LIST *current_sjm= NULL;
+  Item *current_sjm_head= NULL;
+
+  DBUG_ASSERT(!cond ||
+              cond->type() == Item::INT_ITEM ||
+              (cond->type() == Item::FUNC_ITEM &&
+               ((Item_func *) cond)->functype() == Item_func::EQ_FUNC) ||  
+              (cond->type() == Item::COND_ITEM  && 
+               ((Item_func *) cond)->functype() == Item_func::COND_AND_FUNC));
+       
+  /* 
+    Pick the "head" item: the constant one or the first in the join order
+    (if the first in the join order happends to be inside an SJM nest, that's
+    ok, because this is where the value will be unpacked after
+    materialization).
+  */
   if (item_const)
     head= item_const;
   else
   {
-    head= item_equal->get_first();
+    TABLE_LIST *emb_nest;
+    head= item_equal->get_first(NO_PARTICULAR_TAB, NULL);
     it++;
+    if ((emb_nest= embedding_sjm(head)))
+    {
+      current_sjm= emb_nest;
+      current_sjm_head= head;
+    }
   }
-  Item_field *item_field;
-  while ((item_field= it++))
+
+  Item *field_item;
+  /*
+    For each other item, generate "item=head" equality (except the tables that 
+    are within SJ-Materialization nests, for those "head" is defined
+    differently)
+  */
+  while ((field_item= it++))
   {
-    Item_equal *upper= item_field->find_item_equal(upper_levels);
-    Item_field *item= item_field;
-    if (upper)
+    Item_equal *upper= field_item->find_item_equal(upper_levels);
+    Item *item= field_item;
+    TABLE_LIST *field_sjm= embedding_sjm(field_item);
+    if (!field_sjm)
     { 
-      if (item_const && upper->get_const())
-        item= 0;
+      current_sjm= NULL;
+      current_sjm_head= NULL;
+    }      
+
+    /* 
+      Check if "field_item=head" equality is already guaranteed to be true 
+      on upper AND-levels.
+    */
+    if (upper)
+    {
+      TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field);
+      Item *upper_const= upper->get_const();
+      if (item_const && upper_const)
+      {
+        /* 
+          Upper item also has "field_item=const".
+          Don't produce equality if const is equal to item_const.
+        */
+        Item_func_eq *func= new Item_func_eq(item_const, upper_const);
+        func->set_cmp_func();
+        func->quick_fix_field();
+        if (func->val_int())
+          item= 0;
+      }
       else
       {
-        Item_equal_iterator li(*item_equal);
-        while ((item= li++) != item_field)
+        Item_equal_fields_iterator li(*item_equal);
+        while ((item= li++) != field_item)
         {
-          if (item->find_item_equal(upper_levels) == upper)
+          if (embedding_sjm(item) == field_sjm && 
+              item->find_item_equal(upper_levels) == upper)
             break;
         }
       }
+      if (embedding_sjm(field_item) != native_sjm)
+        item= NULL; /* Don't produce equality */
     }
-    if (item == item_field)
+    
+    bool produce_equality= test(item == field_item);
+    if (!item_const && field_sjm && field_sjm != current_sjm)
     {
-      if (eq_item)
-        eq_list.push_back(eq_item);
-      eq_item= new Item_func_eq(item_field, head);
-      if (!eq_item)
+      /* Entering an SJM nest */
+      current_sjm_head= field_item;
+      if (!field_sjm->sj_mat_info->is_sj_scan)
+        produce_equality= FALSE;
+    }
+
+    if (produce_equality)
+    {
+      if (eq_item && eq_list.push_back(eq_item))
+        return 0;
+      
+      /*
+        If we're inside an SJM-nest (current_sjm!=NULL), and the multi-equality
+        doesn't include a constant, we should produce equality with the first
+        of the equal items in this SJM (except for the first element inside the
+        SJM. For that, we produce the equality with the "head" item).
+
+        In other cases, get the "head" item, which is either first of the
+        equals on top level, or the constant.
+      */
+      Item *head_item= (!item_const && current_sjm && 
+                        current_sjm_head != field_item) ? current_sjm_head: head; 
+      Item *head_real_item=  head_item->real_item();
+      if (head_real_item->type() == Item::FIELD_ITEM)
+        head_item= head_real_item;
+      
+      eq_item= new Item_func_eq(field_item->real_item(), head_item);
+
+      if (!eq_item || eq_item->set_cmp_func())
         return 0;
-      eq_item->set_cmp_func();
       eq_item->quick_fix_field();
-   }
+    }
+    current_sjm= field_sjm;
   }
 
-  if (!cond && !eq_list.head())
+  /*
+    We have produced zero, one, or more pair-wise equalities eq_i. We want to
+    return an expression in form:
+
+      cond AND eq_1 AND eq_2 AND eq_3 AND ...
+    
+    'cond' is a parameter for this function, which may be NULL, an Item_int(1),
+    or an Item_func_eq or an Item_cond_and.
+
+    We want to return a well-formed condition: no nested Item_cond_and objects,
+    or Item_cond_and with a single child:
+    - if 'cond' is an Item_cond_and, we add eq_i as its tail
+    - if 'cond' is Item_int(1), we return eq_i
+    - otherwise, we create our own Item_cond_and and put 'cond' at the front of
+      it.
+    - if we have only one condition to return, we don't create an Item_cond_and
+  */
+
+  if (eq_item && eq_list.push_back(eq_item))
+    return 0;
+  COND *res= 0;
+  switch (eq_list.elements)
   {
-    if (!eq_item)
-      return new Item_int((longlong) 1,1);
-    return eq_item;
+  case 0:
+    res= cond ? cond : new Item_int((longlong) 1, 1);
+    break;
+  case 1:
+    if (!cond || cond->type() ==  Item::INT_ITEM)
+      res= eq_item;
+    break;
+  default:
+    break;
   }
-
-  if (eq_item)
-    eq_list.push_back(eq_item);
-  if (!cond)
-    cond= new Item_cond_and(eq_list);
-  else
+  if (!res) 
   {
-    DBUG_ASSERT(cond->type() == Item::COND_ITEM);
-    if (eq_list.elements)
-      ((Item_cond *) cond)->add_at_head(&eq_list);
+    if (cond)
+    {
+      if (cond->type() == Item::COND_ITEM)
+      {
+        res= cond;
+        ((Item_cond *) res)->add_at_end(&eq_list);
+      }
+      else if (eq_list.push_front(cond))
+        return 0;
+    }
+  }  
+  if (!res)
+    res= new Item_cond_and(eq_list);
+  if (res)
+  {
+    res->quick_fix_field();
+    res->update_used_tables();
   }
 
-  cond->quick_fix_field();
-  cond->update_used_tables();
-   
-  return cond;
+  return res;
 }
 
 
@@ -8564,6 +12581,9 @@ static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
     After this the function retrieves all other conjuncted
     predicates substitute every field reference by the field reference
     to the first equal field or equal constant if there are any.
+
+  @param context_tab     Join tab that 'cond' will be attached to, or 
+                         NO_PARTICULAR_TAB. See notes above.
   @param cond            condition to process
   @param cond_equal      multiple equalities to take into consideration
   @param table_join_idx  index to tables determining field preference
@@ -8574,15 +12594,42 @@ static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
     new fields in multiple equality item of lower levels. We want
     the order in them to comply with the order of upper levels.
 
+    context_tab may be used to specify which join tab `cond` will be
+    attached to. There are two possible cases:
+
+    1. context_tab != NO_PARTICULAR_TAB
+       We're doing substitution for an Item which will be evaluated in the 
+       context of a particular item. For example, if the optimizer does a 
+       ref access on "tbl1.key= expr" then
+        = equality substitution will be perfomed on 'expr'
+        = it is known in advance that 'expr' will be evaluated when 
+          table t1 is accessed.
+       Note that in this kind of substution we never have to replace Item_equal
+       objects. For example, for
+
+        t.key= func(col1=col2 AND col2=const)
+       
+       we will not build Item_equal or do equality substution (if we decide to,
+       this function will need to be fixed to handle it)
+
+    2. context_tab == NO_PARTICULAR_TAB
+       We're doing substitution in WHERE/ON condition, which is not yet 
+       attached to any particular join_tab. We will use information about the
+       chosen join order to make "optimal" substitions, i.e. those that allow
+       to apply filtering as soon as possible. See eliminate_item_equal() and 
+       Item_equal::get_first() for details.
+
   @return
-    The transformed condition
+    The transformed condition, or NULL in case of error
 */
 
-static COND* substitute_for_best_equal_field(COND *cond,
+static COND* substitute_for_best_equal_field(JOIN_TAB *context_tab,
+                                             COND *cond,
                                              COND_EQUAL *cond_equal,
                                              void *table_join_idx)
 {
   Item_equal *item_equal;
+  COND *org_cond= cond;                 // Return this in case of fatal error
 
   if (cond->type() == Item::COND_ITEM)
   {
@@ -8593,7 +12640,7 @@ static COND* substitute_for_best_equal_field(COND *cond,
     if (and_level)
     {
       cond_equal= &((Item_cond_and *) cond)->cond_equal;
-      cond_list->disjoin((List<Item> *) &cond_equal->current_level);
+      cond_list->disjoin((List<Item> *) &cond_equal->current_level);/* remove Item_equal objects from the AND. */
 
       List_iterator_fast<Item_equal> it(cond_equal->current_level);      
       while ((item_equal= it++))
@@ -8606,7 +12653,8 @@ static COND* substitute_for_best_equal_field(COND *cond,
     Item *item;
     while ((item= li++))
     {
-      Item *new_item =substitute_for_best_equal_field(item, cond_equal,
+      Item *new_item= substitute_for_best_equal_field(context_tab,
+                                                      item, cond_equal,
                                                       table_join_idx);
       /*
         This works OK with PS/SP re-execution as changes are made to
@@ -8618,33 +12666,84 @@ static COND* substitute_for_best_equal_field(COND *cond,
 
     if (and_level)
     {
+      COND *eq_cond= 0;
       List_iterator_fast<Item_equal> it(cond_equal->current_level);
+      bool false_eq_cond= FALSE;
       while ((item_equal= it++))
       {
-        cond= eliminate_item_equal(cond, cond_equal->upper_levels, item_equal);
-        // This occurs when eliminate_item_equal() founds that cond is
-        // always false and substitutes it with Item_int 0.
-        // Due to this, value of item_equal will be 0, so just return it.
-        if (cond->type() != Item::COND_ITEM)
+        eq_cond= eliminate_item_equal(eq_cond, cond_equal->upper_levels,
+                                               item_equal);
+        if (!eq_cond)
+	{
+          eq_cond= 0;
+          break;
+        }
+        else if (eq_cond->type() == Item::INT_ITEM && !eq_cond->val_bool()) 
+	{
+          /*
+            This occurs when eliminate_item_equal() founds that cond is
+            always false and substitutes it with Item_int 0.
+            Due to this, value of item_equal will be 0, so just return it.
+	  */
+          cond= eq_cond;
+          false_eq_cond= TRUE;
           break;
+        }
       }
-    }
-    if (cond->type() == Item::COND_ITEM &&
-        !((Item_cond*)cond)->argument_list()->elements)
-      cond= new Item_int((int32)cond->val_bool());
-
+      if (eq_cond && !false_eq_cond)
+      {
+        /* Insert the generated equalities before all other conditions */
+        if (eq_cond->type() == Item::COND_ITEM)
+          ((Item_cond *) cond)->add_at_head(
+                                  ((Item_cond *) eq_cond)->argument_list());
+        else
+	{
+          if (cond_list->is_empty())
+            cond= eq_cond;
+          else
+	  {
+             /* Do not add an equality condition if it's always true */ 
+             if (eq_cond->type() != Item::INT_ITEM &&
+                 cond_list->push_front(eq_cond))
+               eq_cond= 0;
+          }
+	}
+      }
+      if (!eq_cond)
+      {
+        /* 
+          We are out of memory doing the transformation.
+          This is a fatal error now. However we bail out by returning the
+          original condition that we had before we started the transformation. 
+	*/
+	cond_list->concat((List<Item> *) &cond_equal->current_level);
+      }
+    }	 
   }
   else if (cond->type() == Item::FUNC_ITEM && 
            ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
   {
     item_equal= (Item_equal *) cond;
     item_equal->sort(&compare_fields_by_table_order, table_join_idx);
+    cond_equal= item_equal->upper_levels;
     if (cond_equal && cond_equal->current_level.head() == item_equal)
-      cond_equal= 0;
-    return eliminate_item_equal(0, cond_equal, item_equal);
+      cond_equal= cond_equal->upper_levels;
+    cond= eliminate_item_equal(0, cond_equal, item_equal);
+    return cond ? cond : org_cond;
+  }
+  else 
+  {
+    while (cond_equal)
+    {
+      List_iterator_fast<Item_equal> it(cond_equal->current_level);
+      while((item_equal= it++))
+      {
+        REPLACE_EQUAL_FIELD_ARG arg= {item_equal, context_tab};
+        cond= cond->transform(&Item::replace_equal_field, (uchar *) &arg);
+      }
+      cond_equal= cond_equal->upper_levels;
+    }
   }
-  else
-    cond->transform(&Item::replace_equal_field, 0);
   return cond;
 }
 
@@ -8660,9 +12759,10 @@ static COND* substitute_for_best_equal_field(COND *cond,
 
   @param cond       condition whose multiple equalities are to be checked
   @param table      constant table that has been read
+  @param const_key  mark key parts as constant
 */
 
-static void update_const_equal_items(COND *cond, JOIN_TAB *tab)
+static void update_const_equal_items(COND *cond, JOIN_TAB *tab, bool const_key)
 {
   if (!(cond->used_tables() & tab->table->map))
     return;
@@ -8673,7 +12773,10 @@ static void update_const_equal_items(COND *cond, JOIN_TAB *tab)
     List_iterator_fast<Item> li(*cond_list);
     Item *item;
     while ((item= li++))
-      update_const_equal_items(item, tab);
+      update_const_equal_items(item, tab,
+                               (((Item_cond*) cond)->top_level() &&
+                                ((Item_cond*) cond)->functype() ==
+                                Item_func::COND_AND_FUNC));
   }
   else if (cond->type() == Item::FUNC_ITEM && 
            ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
@@ -8684,11 +12787,10 @@ static void update_const_equal_items(COND *cond, JOIN_TAB *tab)
     if (!contained_const && item_equal->get_const())
     {
       /* Update keys for range analysis */
-      Item_equal_iterator it(*item_equal);
-      Item_field *item_field;
-      while ((item_field= it++))
+      Item_equal_fields_iterator it(*item_equal);
+      while (it++)
       {
-        Field *field= item_field->field;
+        Field *field= it.get_curr_field();
         JOIN_TAB *stat= field->table->reginfo.join_tab;
         key_map possible_keys= field->key_start;
         possible_keys.intersect(field->table->keys_in_use_for_query);
@@ -8704,7 +12806,8 @@ static void update_const_equal_items(COND *cond, JOIN_TAB *tab)
           TABLE *tab= field->table;
           KEYUSE *use;
           for (use= stat->keyuse; use && use->table == tab; use++)
-            if (possible_keys.is_set(use->key) && 
+            if (const_key &&
+                !use->is_for_hash_join() && possible_keys.is_set(use->key) && 
                 tab->key_info[use->key].key_part[use->keypart].field ==
                 field)
               tab->const_key_parts[use->key]|= use->keypart_map;
@@ -8796,37 +12899,6 @@ change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
   }
 }
 
-/**
-  Remove additional condition inserted by IN/ALL/ANY transformation.
-
-  @param conds   condition for processing
-
-  @return
-    new conditions
-*/
-
-static Item *remove_additional_cond(Item* conds)
-{
-  if (conds->name == in_additional_cond)
-    return 0;
-  if (conds->type() == Item::COND_ITEM)
-  {
-    Item_cond *cnd= (Item_cond*) conds;
-    List_iterator<Item> li(*(cnd->argument_list()));
-    Item *item;
-    while ((item= li++))
-    {
-      if (item->name == in_additional_cond)
-      {
-	li.remove();
-	if (cnd->argument_list()->elements == 1)
-	  return cnd->argument_list()->head();
-	return conds;
-      }
-    }
-  }
-  return conds;
-}
 
 static void
 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
@@ -8864,10 +12936,10 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
     {
       Item_func_eq *func=(Item_func_eq*) cond;
       Item **args= func->arguments();
-      bool left_const= args[0]->const_item();
-      bool right_const= args[1]->const_item();
+      bool left_const= args[0]->const_item() && !args[0]->is_expensive();
+      bool right_const= args[1]->const_item() && !args[1]->is_expensive();
       if (!(left_const && right_const) &&
-          args[0]->result_type() == args[1]->result_type())
+          args[0]->cmp_type() == args[1]->cmp_type())
       {
 	if (right_const)
 	{
@@ -8888,7 +12960,6 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
   }
 }
 
-
 /**
   Simplify joins replacing outer joins by inner joins whenever it's
   possible.
@@ -8983,13 +13054,18 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
     consider any plan where one of the inner tables is before some of outer
     tables.
 
-
+  IMPLEMENTATION
     The function is implemented by a recursive procedure.  On the recursive
     ascent all attributes are calculated, all outer joins that can be
     converted are replaced and then all unnecessary braces are removed.
     As join list contains join tables in the reverse order sequential
     elimination of outer joins does not require extra recursive calls.
 
+  SEMI-JOIN NOTES
+    Remove all semi-joins that have are within another semi-join (i.e. have
+    an "ancestor" semi-join nest)
+
+  EXAMPLES
     Here is an example of a join query with invalid cross references:
     @code
       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b 
@@ -8999,14 +13075,15 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
   @param join_list   list representation of the join to be converted
   @param conds       conditions to add on expressions for converted joins
   @param top         true <=> conds is the where condition
-
+  @param in_sj       TRUE <=> processing semi-join nest's children
   @return
     - The new condition, if success
     - 0, otherwise
 */
 
 static COND *
-simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
+simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top,
+               bool in_sj)
 {
   TABLE_LIST *table;
   NESTED_JOIN *nested_join;
@@ -9042,7 +13119,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
            the corresponding on expression is added to E. 
 	*/ 
         expr= simplify_joins(join, &nested_join->join_list,
-                             expr, FALSE);
+                             expr, FALSE, in_sj || table->sj_on_expr);
 
         if (!table->prep_on_expr || expr != table->on_expr)
         {
@@ -9054,15 +13131,18 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
       }
       nested_join->used_tables= (table_map) 0;
       nested_join->not_null_tables=(table_map) 0;
-      conds= simplify_joins(join, &nested_join->join_list, conds, top);
+      conds= simplify_joins(join, &nested_join->join_list, conds, top, 
+                            in_sj || table->sj_on_expr);
       used_tables= nested_join->used_tables;
       not_null_tables= nested_join->not_null_tables;  
+      /* The following two might become unequal after table elimination: */
+      nested_join->n_tables= nested_join->join_list.elements;
     }
     else
     {
       if (!table->prep_on_expr)
         table->prep_on_expr= table->on_expr;
-      used_tables= table->table->map;
+      used_tables= table->get_map();
       if (conds)
         not_null_tables= conds->not_null_tables();
     }
@@ -9073,16 +13153,19 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
       table->embedding->nested_join->not_null_tables|= not_null_tables;
     }
 
-    if (!table->outer_join || (used_tables & not_null_tables))
+    if (!(table->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) ||
+        (used_tables & not_null_tables))
     {
       /* 
         For some of the inner tables there are conjunctive predicates
         that reject nulls => the outer join can be replaced by an inner join.
       */
+      if (table->outer_join && !table->embedding && table->table)
+        table->table->maybe_null= FALSE;
       table->outer_join= 0;
       if (table->on_expr)
       {
-        /* Add on expression to the where condition. */
+        /* Add ON expression to the WHERE or upper-level ON condition. */
         if (conds)
         {
           conds= and_conds(conds, table->on_expr);
@@ -9096,9 +13179,6 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
         table->prep_on_expr= table->on_expr= 0;
       }
     }
-    
-    if (!top)
-      continue;
 
     /* 
       Only inner tables of non-convertible outer joins
@@ -9117,7 +13197,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
         table->embedding->on_expr_dep_tables|= table->on_expr->used_tables();
       }
       else
-        table->dep_tables&= ~table->table->map;
+        table->dep_tables&= ~table->get_map();
     }
 
     if (prev_table)
@@ -9130,7 +13210,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
         prev_table->dep_tables|= table->on_expr_dep_tables;
         table_map prev_used_tables= prev_table->nested_join ?
 	                            prev_table->nested_join->used_tables :
-	                            prev_table->table->map;
+	                            prev_table->get_map();
         /* 
           If on expression contains only references to inner tables
           we still make the inner tables dependent on the outer tables.
@@ -9142,7 +13222,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
           For example it might happen if RAND() function
           is used in JOIN ON clause.
 	*/  
-        if (!((prev_table->on_expr->used_tables() & ~RAND_TABLE_BIT) &
+        if (!((prev_table->on_expr->used_tables() &
+               ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) &
               ~prev_used_tables))
           prev_table->dep_tables|= used_tables;
       }
@@ -9150,21 +13231,49 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
     prev_table= table;
   }
     
-  /* Flatten nested joins that can be flattened. */
+  /* 
+    Flatten nested joins that can be flattened.
+    no ON expression and not a semi-join => can be flattened.
+  */
   li.rewind();
   while ((table= li++))
   {
     nested_join= table->nested_join;
-    if (nested_join && !table->on_expr)
+    if (table->sj_on_expr && !in_sj)
+    {
+       /*
+         If this is a semi-join that is not contained within another semi-join, 
+         leave it intact (otherwise it is flattened)
+       */
+      join->select_lex->sj_nests.push_back(table);
+
+      /* 
+        Also, walk through semi-join children and mark those that are now
+        top-level
+      */
+      TABLE_LIST *tbl;
+      List_iterator<TABLE_LIST> it(nested_join->join_list);
+      while ((tbl= it++))
+      {
+        if (!tbl->on_expr && tbl->table)
+          tbl->table->maybe_null= FALSE;
+      }
+    }
+    else if (nested_join && !table->on_expr)
     {
       TABLE_LIST *tbl;
       List_iterator<TABLE_LIST> it(nested_join->join_list);
+      List<TABLE_LIST> repl_list;  
       while ((tbl= it++))
       {
         tbl->embedding= table->embedding;
+        if (!tbl->embedding && !tbl->on_expr && tbl->table)
+          tbl->table->maybe_null= FALSE;
         tbl->join_list= table->join_list;
+        repl_list.push_back(tbl);
+        tbl->dep_tables|= table->dep_tables;
       }
-      li.replace(nested_join->join_list);
+      li.replace(repl_list);
     }
   }
   DBUG_RETURN(conds); 
@@ -9174,8 +13283,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
 /**
   Assign each nested join structure a bit in nested_join_map.
 
-    Assign each nested join structure (except "confluent" ones - those that
-    embed only one element) a bit in nested_join_map.
+    Assign each nested join structure (except ones that embed only one element
+    and so are redundant) a bit in nested_join_map.
 
   @param join          Join being processed
   @param join_list     List of tables
@@ -9184,7 +13293,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
 
   @note
     This function is called after simplify_joins(), when there are no
-    redundant nested joins, #non_confluent_nested_joins <= #tables_in_join so
+    redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so
     we will not run out of bits in nested_join_map.
 
   @return
@@ -9211,9 +13320,11 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
             with anything)
         2. we could run out bits in nested_join_map otherwise.
       */
-      if (nested_join->join_list.elements != 1)
+      if (nested_join->n_tables != 1)
       {
-        nested_join->nj_map= (nested_join_map) 1 << first_unused++;
+        /* Don't assign bits to sj-nests */
+        if (table->on_expr)
+          nested_join->nj_map= (nested_join_map) 1 << first_unused++;
         first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
                                                     first_unused);
       }
@@ -9233,21 +13344,28 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
                     tables which will be ignored.
 */
 
-static void reset_nj_counters(List<TABLE_LIST> *join_list)
+static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list)
 {
   List_iterator<TABLE_LIST> li(*join_list);
   TABLE_LIST *table;
   DBUG_ENTER("reset_nj_counters");
+  uint n=0;
   while ((table= li++))
   {
     NESTED_JOIN *nested_join;
+    bool is_eliminated_nest= FALSE;
     if ((nested_join= table->nested_join))
     {
       nested_join->counter= 0;
-      reset_nj_counters(&nested_join->join_list);
+      nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list);
+      if (!nested_join->n_tables)
+        is_eliminated_nest= TRUE;
     }
+    if ((table->nested_join && !is_eliminated_nest) || 
+        (!table->nested_join && (table->table->map & ~join->eliminated_tables)))
+      n++;
   }
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(n);
 }
 
 
@@ -9359,28 +13477,31 @@ static bool check_interleaving_with_nj(JOIN_TAB *next_tab)
     Do update counters for "pairs of brackets" that we've left (marked as
     X,Y,Z in the above picture)
   */
-  for (;next_emb; next_emb= next_emb->embedding)
+  for (;next_emb && next_emb != join->emb_sjm_nest; next_emb= next_emb->embedding)
   {
-    next_emb->nested_join->counter++;
-    if (next_emb->nested_join->counter == 1)
+    if (!next_emb->sj_on_expr)
     {
-      /* 
-        next_emb is the first table inside a nested join we've "entered". In
-        the picture above, we're looking at the 'X' bracket. Don't exit yet as
-        X bracket might have Y pair bracket.
+      next_emb->nested_join->counter++;
+      if (next_emb->nested_join->counter == 1)
+      {
+        /* 
+          next_emb is the first table inside a nested join we've "entered". In
+          the picture above, we're looking at the 'X' bracket. Don't exit yet as
+          X bracket might have Y pair bracket.
+        */
+        join->cur_embedding_map |= next_emb->nested_join->nj_map;
+      }
+      
+      if (next_emb->nested_join->n_tables !=
+          next_emb->nested_join->counter)
+        break;
+
+      /*
+        We're currently at Y or Z-bracket as depicted in the above picture.
+        Mark that we've left it and continue walking up the brackets hierarchy.
       */
-      join->cur_embedding_map |= next_emb->nested_join->nj_map;
+      join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
     }
-    
-    if (next_emb->nested_join->join_list.elements !=
-        next_emb->nested_join->counter)
-      break;
-
-    /*
-      We're currently at Y or Z-bracket as depicted in the above picture.
-      Mark that we've left it and continue walking up the brackets hierarchy.
-    */
-    join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
   }
   return FALSE;
 }
@@ -9442,33 +13563,141 @@ static void restore_prev_nj_state(JOIN_TAB *last)
 {
   TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding;
   JOIN *join= last->join;
-  for (;last_emb != NULL; last_emb= last_emb->embedding)
+  for (;last_emb != NULL && last_emb != join->emb_sjm_nest; 
+       last_emb= last_emb->embedding)
   {
-    NESTED_JOIN *nest= last_emb->nested_join;
-    DBUG_ASSERT(nest->counter > 0);
-    
-    bool was_fully_covered= nest->is_fully_covered();
-    
-    if (--nest->counter == 0)
-      join->cur_embedding_map&= ~nest->nj_map;
-    
-    if (!was_fully_covered)
-      break;
+    if (!last_emb->sj_on_expr)
+    {
+      NESTED_JOIN *nest= last_emb->nested_join;
+      DBUG_ASSERT(nest->counter > 0);
+      
+      bool was_fully_covered= nest->is_fully_covered();
+      
+      join->cur_embedding_map|= nest->nj_map;
+
+      if (--nest->counter == 0)
+        join->cur_embedding_map&= ~nest->nj_map;
+      
+      if (!was_fully_covered)
+        break;
+    }
+  }
+}
+
+
+
+/*
+  Change access methods not to use join buffering and adjust costs accordingly
+
+  SYNOPSIS
+    optimize_wo_join_buffering()
+      join
+      first_tab               The first tab to do re-optimization for
+      last_tab                The last tab to do re-optimization for
+      last_remaining_tables   Bitmap of tables that are not in the
+                              [0...last_tab] join prefix
+      first_alt               TRUE <=> Use the LooseScan plan for the first_tab
+      no_jbuf_before          Don't allow to use join buffering before this
+                              table
+      reopt_rec_count     OUT New output record count
+      reopt_cost          OUT New join prefix cost
+
+  DESCRIPTION
+    Given a join prefix [0; ... first_tab], change the access to the tables
+    in the [first_tab; last_tab] not to use join buffering. This is needed
+    because some semi-join strategies cannot be used together with the join
+    buffering.
+    In general case the best table order in [first_tab; last_tab] range with
+    join buffering is different from the best order without join buffering but
+    we don't try finding a better join order. (TODO ask Igor why did we
+    chose not to do this in the end. that's actually the difference from the 
+    forking approach)
+*/
+
+void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, 
+                                table_map last_remaining_tables, 
+                                bool first_alt, uint no_jbuf_before,
+                                double *outer_rec_count, double *reopt_cost)
+{
+  double cost, rec_count;
+  table_map reopt_remaining_tables= last_remaining_tables;
+  uint i;
+
+  if (first_tab > join->const_tables)
+  {
+    cost=      join->positions[first_tab - 1].prefix_cost.total_cost();
+    rec_count= join->positions[first_tab - 1].prefix_record_count;
+  }
+  else
+  {
+    cost= 0.0;
+    rec_count= 1;
+  }
+
+  *outer_rec_count= rec_count;
+  for (i= first_tab; i <= last_tab; i++)
+    reopt_remaining_tables |= join->positions[i].table->table->map;
+  
+  /*
+    best_access_path() optimization depends on the value of 
+    join->cur_sj_inner_tables. Our goal in this function is to do a
+    re-optimization with disabled join buffering, but no other changes.
+    In order to achieve this, cur_sj_inner_tables needs have the same 
+    value it had during the original invocations of best_access_path. 
+
+    We know that this function, optimize_wo_join_buffering() is called to
+    re-optimize semi-join join order range, which allows to conclude that 
+    the "original" value of cur_sj_inner_tables was 0.
+  */
+  table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables;
+  join->cur_sj_inner_tables= 0;
+
+  for (i= first_tab; i <= last_tab; i++)
+  {
+    JOIN_TAB *rs= join->positions[i].table;
+    POSITION pos, loose_scan_pos;
     
-    join->cur_embedding_map|= nest->nj_map;
+    if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer)
+    {
+      /* Find the best access method that would not use join buffering */
+      best_access_path(join, rs, reopt_remaining_tables, i, 
+                       TRUE, rec_count,
+                       &pos, &loose_scan_pos);
+    }
+    else 
+      pos= join->positions[i];
+
+    if ((i == first_tab && first_alt))
+      pos= loose_scan_pos;
+
+    reopt_remaining_tables &= ~rs->table->map;
+    rec_count *= pos.records_read;
+    cost += pos.read_time;
+
+    if (!rs->emb_sj_nest)
+      *outer_rec_count *= pos.records_read;
   }
+  join->cur_sj_inner_tables= save_cur_sj_inner_tables;
+
+  *reopt_cost= cost;
 }
 
 
 static COND *
-optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
-              Item::cond_result *cond_value)
+optimize_cond(JOIN *join, COND *conds, 
+              List<TABLE_LIST> *join_list, bool ignore_on_conds,
+              Item::cond_result *cond_value, COND_EQUAL **cond_equal)
 {
   THD *thd= join->thd;
   DBUG_ENTER("optimize_cond");
 
   if (!conds)
+  {
     *cond_value= Item::COND_TRUE;
+    if (!ignore_on_conds)
+      build_equal_items(join, NULL, NULL, join_list, ignore_on_conds,
+                        cond_equal);
+  }  
   else
   {
     /* 
@@ -9480,9 +13709,9 @@ optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
       multiple equality contains a constant.
     */ 
     DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY););
-    conds= build_equal_items(join->thd, conds, NULL, join_list,
-                             &join->cond_equal);
-    DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY););
+    conds= build_equal_items(join, conds, NULL, join_list, ignore_on_conds,
+                             cond_equal);
+     DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY););
 
     /* change field = field to field = const for each found field = const */
     propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds);
@@ -9491,7 +13720,10 @@ optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
       Remove all and-levels where CONST item != CONST item
     */
     DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY););
-    conds= remove_eq_conds(thd, conds, cond_value) ;
+    conds= remove_eq_conds(thd, conds, cond_value);
+    if (conds && conds->type() == Item::COND_ITEM &&
+        ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
+      *cond_equal= &((Item_cond_and*) conds)->cond_equal;
     DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY););
   }
   DBUG_RETURN(conds);
@@ -9499,22 +13731,230 @@ optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
 
 
 /**
-  Handles the reqursive job for remove_eq_conds()
+  @brief
+  Propagate multiple equalities to the sub-expressions of a condition
+
+  @param thd             thread handle
+  @param cond            the condition where equalities are to be propagated
+  @param *new_equalities the multiple equalities to be propagated
+  @param inherited        path to all inherited multiple equality items
+  @param[out] is_simplifiable_cond   'cond' may be simplified after the
+                                      propagation of the equalities
+ 
+  @details
+  The function recursively traverses the tree of the condition 'cond' and
+  for each its AND sub-level of any depth the function merges the multiple
+  equalities from the list 'new_equalities' into the multiple equalities
+  attached to the AND item created for this sub-level.
+  The function also [re]sets references to the equalities formed by the
+  merges of multiple equalities in all field items occurred in 'cond'
+  that are encountered in the equalities.
+  If the result of any merge of multiple equalities is an impossible
+  condition the function returns TRUE in the parameter is_simplifiable_cond.   
+*/
 
-  Remove const and eq items. Return new item, or NULL if no condition
-  cond_value is set to according:
-  COND_OK    query is possible (field = constant)
-  COND_TRUE  always true	( 1 = 1 )
-  COND_FALSE always false	( 1 = 2 )
+void propagate_new_equalities(THD *thd, Item *cond,
+                              List<Item_equal> *new_equalities,
+                              COND_EQUAL *inherited,
+                              bool *is_simplifiable_cond)
+{
+  if (cond->type() == Item::COND_ITEM)
+  {
+    bool and_level= ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC;
+    if (and_level)
+    {
+      Item_cond_and *cond_and= (Item_cond_and *) cond; 
+      List<Item_equal> *cond_equalities= &cond_and->cond_equal.current_level;
+      cond_and->cond_equal.upper_levels= inherited;
+      if (!cond_equalities->is_empty() && cond_equalities != new_equalities)
+      {
+        Item_equal *equal_item;
+        List_iterator<Item_equal> it(*new_equalities);
+	while ((equal_item= it++))
+	{
+          equal_item->merge_into_list(cond_equalities, true, true);
+        }
+        List_iterator<Item_equal> ei(*cond_equalities);
+        while ((equal_item= ei++))
+	{
+          if (equal_item->const_item() && !equal_item->val_int())
+	  {
+            *is_simplifiable_cond= true;
+            return;
+          }
+        }
+      }
+    }
 
-  SYNPOSIS
-    remove_eq_conds()
-    thd 			THD environment
-    cond                        the condition to handle
-    cond_value                  the resulting value of the condition
+    Item *item;
+    List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+    while ((item= li++))
+    {
+      COND_EQUAL *new_inherited= and_level && item->type() == Item::COND_ITEM ?
+	                           &((Item_cond_and *) cond)->cond_equal :
+                                   inherited;
+      propagate_new_equalities(thd, item, new_equalities, new_inherited,
+                               is_simplifiable_cond);
+    }
+  }
+  else if (cond->type() == Item::FUNC_ITEM && 
+           ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
+  {
+    Item_equal *equal_item;
+    List_iterator<Item_equal> it(*new_equalities);
+    Item_equal *equality= (Item_equal *) cond;
+    equality->upper_levels= inherited;
+    while ((equal_item= it++))
+    {
+      equality->merge_with_check(equal_item, true);
+    }
+    if (equality->const_item() && !equality->val_int())
+      *is_simplifiable_cond= true;
+  }
+  else
+  {
+    uchar* is_subst_valid= (uchar *) Item::ANY_SUBST;
+    cond= cond->compile(&Item::subst_argument_checker,
+                        &is_subst_valid, 
+                        &Item::equal_fields_propagator,
+                        (uchar *) inherited);
+    cond->update_used_tables();
+  }          
+} 
 
-  RETURN
-    *COND with the simplified condition
+/*
+  Check if cond_is_datetime_is_null() is true for the condition cond, or 
+  for any of its AND/OR-children
+*/
+bool cond_has_datetime_is_null(Item *cond)
+{
+  if (cond_is_datetime_is_null(cond))
+    return true;
+
+  if (cond->type() == Item::COND_ITEM)
+  {
+    List<Item> *cond_arg_list= ((Item_cond*) cond)->argument_list();
+    List_iterator<Item> li(*cond_arg_list);
+    Item *item;
+    while ((item= li++))
+    {
+      if (cond_has_datetime_is_null(item))
+        return true;
+    }
+  }
+  return false;
+}
+
+/*
+  Check if passed condtition has for of
+
+    not_null_date_col IS NULL
+
+  where not_null_date_col has a datte or datetime type
+*/
+
+bool cond_is_datetime_is_null(Item *cond)
+{
+  if (cond->type() == Item::FUNC_ITEM &&
+      ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
+  {
+    Item **args= ((Item_func_isnull*) cond)->arguments();
+    if (args[0]->type() == Item::FIELD_ITEM)
+    {
+      Field *field=((Item_field*) args[0])->field;
+
+      if (((field->type() == MYSQL_TYPE_DATE) ||
+           (field->type() == MYSQL_TYPE_DATETIME)) &&
+          (field->flags & NOT_NULL_FLAG))
+      {
+        return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Evaluate all constant boolean sub-expressions in a condition
+ 
+  @param thd        thread handle
+  @param cond       condition where where to evaluate constant sub-expressions
+  @param[out] cond_value : the returned value of the condition 
+                           (TRUE/FALSE/UNKNOWN:
+                           Item::COND_TRUE/Item::COND_FALSE/Item::COND_OK)
+  @return
+   the item that is the result of the substitution of all inexpensive constant
+   boolean sub-expressions into cond, or,
+   NULL if the condition is constant and is evaluated to FALSE.
+
+  @details
+  This function looks for all inexpensive constant boolean sub-expressions in
+  the given condition 'cond' and substitutes them for their values.
+  For example, the condition 2 > (5 + 1) or a < (10 / 2)
+  will be transformed to the condition a < (10 / 2).
+  Note that a constant sub-expression is evaluated only if it is constant and
+  inexpensive. A sub-expression with an uncorrelated subquery may be evaluated
+  only if the subquery is considered as inexpensive.
+  The function does not evaluate a constant sub-expression if it is not on one
+  of AND/OR levels of the condition 'cond'. For example, the subquery in the
+  condition a > (select max(b) from t1 where b > 5) will never be evaluated
+  by this function. 
+  If a constant boolean sub-expression is evaluated to TRUE then:
+    - when the sub-expression is a conjunct of an AND formula it is simply
+      removed from this formula
+    - when the sub-expression is a disjunct of an OR formula the whole OR
+      formula is converted to TRUE 
+  If a constant boolean sub-expression is evaluated to FALSE then:
+    - when the sub-expression is a disjunct of an OR formula it is simply
+      removed from this formula
+    - when the sub-expression is a conjuct of an AND formula the whole AND
+      formula is converted to FALSE
+  When a disjunct/conjunct is removed from an OR/AND formula it might happen
+  that there is only one conjunct/disjunct remaining. In this case this
+  remaining disjunct/conjunct must be merged into underlying AND/OR formula,
+  because AND/OR levels must alternate in the same way as they alternate
+  after fix_fields() is called for the original condition.
+  The specifics of merging a formula f into an AND formula A appears
+  when A contains multiple equalities and f contains multiple equalities.
+  In this case the multiple equalities from f and A have to be merged.
+  After this the resulting multiple equalities have to be propagated into
+  the all AND/OR levels of the formula A (see propagate_new_equalities()).
+  The propagation of multiple equalities might result in forming multiple
+  equalities that are always FALSE. This, in its turn, might trigger further
+  simplification of the condition.
+
+  @note
+  EXAMPLE 1:
+  SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5 OR 1 != 1);
+  First 1 != 1 will be removed from the second conjunct:
+  => SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5);
+  Then (b = 5 AND a = 5) will be merged into the top level condition:
+  => SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5) AND (a = 5);
+  Then (b = 5), (a = 5)  will be propagated into the disjuncs of 
+  (b = 1 OR a = 1):
+  => SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR
+                             (a = 1) AND (b = 5) AND (a = 5)) AND
+                            (b = 5) AND (a = 5)
+  => SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR
+                             (FALSE AND (b = 5))) AND
+                             (b = 5) AND (a = 5)
+  After this an additional call of remove_eq_conds() converts it
+  to FALSE
+
+  EXAMPLE 2:  
+  SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5 OR 1 != 1);
+  => SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5);
+  => SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5) AND (a = 5);
+  => SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR
+                             (a = 5) AND (b = 5) AND (a = 5)) AND
+                            (b = 5) AND (a = 5)
+  => SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR
+                             ((b = 5) AND (a = 5))) AND
+                             (b = 5) AND (a = 5)
+  After this an additional call of  remove_eq_conds() converts it to
+ =>  SELECT * FROM t1 WHERE (b = 5) AND (a = 5)                            
 */
 
 static COND *
@@ -9524,22 +13964,126 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
   {
     bool and_level= ((Item_cond*) cond)->functype()
       == Item_func::COND_AND_FUNC;
-    List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
-    Item::cond_result tmp_cond_value;
-    bool should_fix_fields=0;
+    List<Item> *cond_arg_list= ((Item_cond*) cond)->argument_list();
 
-    *cond_value=Item::COND_UNDEF;
+    if (and_level)
+    {
+      /* 
+        Remove multiple equalities that became always true (e.g. after
+        constant row substitution).
+        They would be removed later in the function anyway, but the list of
+        them cond_equal.current_level also  must be adjusted correspondingly.
+        So it's easier  to do it at one pass through the list of the equalities.
+      */ 
+       List<Item_equal> *cond_equalities=
+        &((Item_cond_and *) cond)->cond_equal.current_level;
+       cond_arg_list->disjoin((List<Item> *) cond_equalities);
+       List_iterator<Item_equal> it(*cond_equalities);
+       Item_equal *eq_item;
+       while ((eq_item= it++))
+       {
+         if (eq_item->const_item() && eq_item->val_int())
+           it.remove();
+       }  
+       cond_arg_list->concat((List<Item> *) cond_equalities);       
+    }
+
+    List<Item_equal> new_equalities;
+    List_iterator<Item> li(*cond_arg_list);
+    bool should_fix_fields= 0;
+    Item::cond_result tmp_cond_value;
     Item *item;
+
+    /* 
+      If the list cond_arg_list became empty then it consisted only
+      of always true multiple equalities.
+    */ 
+    *cond_value= cond_arg_list->elements ? Item::COND_UNDEF : Item::COND_TRUE;
+
     while ((item=li++))
     {
       Item *new_item=internal_remove_eq_conds(thd, item, &tmp_cond_value);
       if (!new_item)
+      {
+        /* This can happen only when item is converted to TRUE or FALSE */
 	li.remove();
+      }
       else if (item != new_item)
       {
-	(void) li.replace(new_item);
-	should_fix_fields=1;
-      }
+        /* 
+          This can happen when:
+          - item was an OR formula converted to one disjunct
+          - item was an AND formula converted to one conjunct
+          In these cases the disjunct/conjunct must be merged into the
+          argument list of cond.
+	*/
+        if (new_item->type() == Item::COND_ITEM &&
+            item->type() == Item::COND_ITEM)
+        {
+          DBUG_ASSERT(((Item_cond *) cond)->functype() == 
+                      ((Item_cond *) new_item)->functype());          
+	  List<Item> *new_item_arg_list=
+            ((Item_cond *) new_item)->argument_list();
+          if (and_level)
+	  {
+            /*
+              If new_item is an AND formula then multiple equalities
+              of new_item_arg_list must merged into multiple equalities
+              of cond_arg_list. 
+	    */
+            List<Item_equal> *new_item_equalities=
+              &((Item_cond_and *) new_item)->cond_equal.current_level;
+            if (!new_item_equalities->is_empty())
+	    {
+              /*
+                Cut the multiple equalities from the new_item_arg_list and
+                append them on the list new_equalities. Later the equalities
+                from this list will be merged into the multiple equalities
+                of cond_arg_list all together.
+	      */
+              new_item_arg_list->disjoin((List<Item> *) new_item_equalities);
+              new_equalities.concat(new_item_equalities);
+            }
+          }
+          if (new_item_arg_list->is_empty())
+	    li.remove();
+	  else
+	  {
+            uint cnt= new_item_arg_list->elements;
+            li.replace(*new_item_arg_list);
+            /* Make iterator li ignore new items */
+            for (cnt--; cnt; cnt--)
+              li++;
+            should_fix_fields= 1;
+          }
+        }
+        else if (and_level && 
+                 new_item->type() == Item::FUNC_ITEM && 
+                 ((Item_cond*) new_item)->functype() ==
+                  Item_func::MULT_EQUAL_FUNC)
+	{
+          li.remove();
+          new_equalities.push_back((Item_equal *) new_item);
+        }
+        else
+	{
+          if (new_item->type() == Item::COND_ITEM &&
+              ((Item_cond*) new_item)->functype() == 
+              ((Item_cond*) cond)->functype())
+	  {
+	    List<Item> *new_item_arg_list=
+              ((Item_cond *) new_item)->argument_list();
+            uint cnt= new_item_arg_list->elements;
+            li.replace(*new_item_arg_list);
+            /* Make iterator li ignore new items */
+            for (cnt--; cnt; cnt--)
+              li++;
+          }
+          else
+            li.replace(new_item);
+          should_fix_fields= 1;
+        } 
+      }   
       if (*cond_value == Item::COND_UNDEF)
 	*cond_value=tmp_cond_value;
       switch (tmp_cond_value) {
@@ -9565,6 +14109,55 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
 	break; /* purecov: deadcode */
       }
     }
+    if (!new_equalities.is_empty())
+    {
+      DBUG_ASSERT(and_level);
+      /* 
+        Merge multiple equalities that were cut from the results of 
+        simplification of OR formulas converted into AND formulas.
+        These multiple equalities are to be merged into the
+        multiple equalities of  cond_arg_list.
+      */
+      COND_EQUAL *cond_equal= &((Item_cond_and *) cond)->cond_equal;
+      List<Item_equal> *cond_equalities= &cond_equal->current_level;
+      cond_arg_list->disjoin((List<Item> *) cond_equalities);
+      Item_equal *equality;
+      List_iterator_fast<Item_equal> it(new_equalities);
+      while ((equality= it++))
+      {
+	equality->upper_levels= cond_equal->upper_levels;
+        equality->merge_into_list(cond_equalities, false, false);
+        List_iterator_fast<Item_equal> ei(*cond_equalities);
+        while ((equality= ei++))
+	{
+          if (equality->const_item() && !equality->val_int())
+	  {
+            *cond_value= Item::COND_FALSE;
+            return (COND*) 0;
+          }
+        }
+      }
+      cond_arg_list->concat((List<Item> *) cond_equalities);
+      /* 
+        Propagate the newly formed multiple equalities to
+        the all AND/OR levels of cond 
+      */
+      bool is_simplifiable_cond= false;
+      propagate_new_equalities(thd, cond, cond_equalities,
+                               cond_equal->upper_levels,
+                               &is_simplifiable_cond);
+      /*
+        If the above propagation of multiple equalities brings us
+        to multiple equalities that are always FALSE then try to
+        simplify the condition with remove_eq_cond() again.
+      */ 
+      if (is_simplifiable_cond)
+      {
+        if (!(cond= internal_remove_eq_conds(thd, cond, cond_value)))
+          return cond;
+      } 
+      should_fix_fields= 1;
+    }
     if (should_fix_fields)
       cond->update_used_tables();
 
@@ -9578,60 +14171,52 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
       return item;
     }
   }
-  else if (cond->type() == Item::FUNC_ITEM &&
-	   ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
+  else if (cond_is_datetime_is_null(cond))
   {
-    Item_func_isnull *func=(Item_func_isnull*) cond;
-    Item **args= func->arguments();
-    if (args[0]->type() == Item::FIELD_ITEM)
-    {
-      Field *field=((Item_field*) args[0])->field;
-      /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
-      /*
-        See BUG#12594011
-        Documentation says that
-        SELECT datetime_notnull d FROM t1 WHERE d IS NULL
-        shall return rows where d=='0000-00-00'
+    /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
+    /*
+      See BUG#12594011
+      Documentation says that
+      SELECT datetime_notnull d FROM t1 WHERE d IS NULL
+      shall return rows where d=='0000-00-00'
 
-        Thus, for DATE and DATETIME columns defined as NOT NULL,
-        "date_notnull IS NULL" has to be modified to
-        "date_notnull IS NULL OR date_notnull == 0" (if outer join)
-        "date_notnull == 0"                         (otherwise)
+      Thus, for DATE and DATETIME columns defined as NOT NULL,
+      "date_notnull IS NULL" has to be modified to
+      "date_notnull IS NULL OR date_notnull == 0" (if outer join)
+      "date_notnull == 0"                         (otherwise)
 
-      */
-      if (((field->type() == MYSQL_TYPE_DATE) ||
-           (field->type() == MYSQL_TYPE_DATETIME)) &&
-          (field->flags & NOT_NULL_FLAG))
-      {
-        Item *item0= new(thd->mem_root) Item_int((longlong)0, 1);
-        Item *eq_cond= new(thd->mem_root) Item_func_eq(args[0], item0);
-        if (!eq_cond)
-          return cond;
+    */
+    Item **args= ((Item_func_isnull*) cond)->arguments();
+    Field *field=((Item_field*) args[0])->field;
 
-        if (field->table->pos_in_table_list->outer_join)
-        {
-          // outer join: transform "col IS NULL" to "col IS NULL or col=0"
-          Item *or_cond= new(thd->mem_root) Item_cond_or(eq_cond, cond);
-          if (!or_cond)
-            return cond;
-          cond= or_cond;
-        }
-        else
-        {
-          // not outer join: transform "col IS NULL" to "col=0"
-          cond= eq_cond;
-        }
+    Item *item0= new(thd->mem_root) Item_int((longlong)0, 1);
+    Item *eq_cond= new(thd->mem_root) Item_func_eq(args[0], item0);
+    if (!eq_cond)
+      return cond;
 
-        cond->fix_fields(thd, &cond);
-      }
+        if (field->table->pos_in_table_list->is_inner_table_of_outer_join())
+    {
+      // outer join: transform "col IS NULL" to "col IS NULL or col=0"
+      Item *or_cond= new(thd->mem_root) Item_cond_or(eq_cond, cond);
+      if (!or_cond)
+        return cond;
+      cond= or_cond;
     }
-    if (cond->const_item())
+    else
+    {
+      // not outer join: transform "col IS NULL" to "col=0"
+      cond= eq_cond;
+    }
+
+    cond->fix_fields(thd, &cond);
+
+    if (cond->const_item() && !cond->is_expensive())
     {
       *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
       return (COND*) 0;
     }
   }
-  else if (cond->const_item())
+  else if (cond->const_item() && !cond->is_expensive())
   {
     *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
     return (COND*) 0;
@@ -9651,7 +14236,6 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
   return cond;					// Point at next and level
 }
 
-
 /**
   Remove const and eq items. Return new item, or NULL if no condition
   cond_value is set to according:
@@ -9730,40 +14314,36 @@ remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
 }
 
 
-/* 
+/**
   Check if equality can be used in removing components of GROUP BY/DISTINCT
   
-  SYNOPSIS
-    test_if_equality_guarantees_uniqueness()
-      l          the left comparison argument (a field if any)
-      r          the right comparison argument (a const of any)
-  
-  DESCRIPTION    
-    Checks if an equality predicate can be used to take away 
-    DISTINCT/GROUP BY because it is known to be true for exactly one 
-    distinct value (e.g. <expr> == <const>).
-    Arguments must be of the same type because e.g. 
-    <string_field> = <int_const> may match more than 1 distinct value from 
-    the column. 
-    We must take into consideration and the optimization done for various 
-    string constants when compared to dates etc (see Item_int_with_ref) as
-    well as the collation of the arguments.
+  @param    l          the left comparison argument (a field if any)
+  @param    r          the right comparison argument (a const of any)
   
-  RETURN VALUE  
-    TRUE    can be used
-    FALSE   cannot be used
+  @details
+  Checks if an equality predicate can be used to take away 
+  DISTINCT/GROUP BY because it is known to be true for exactly one 
+  distinct value (e.g. <expr> == <const>).
+  Arguments must be compared in the native type of the left argument
+  and (for strings) in the native collation of the left argument.
+  Otherwise, for example,
+  <string_field> = <int_const> may match more than 1 distinct value or
+  the <string_field>.
+
+  @note We don't need to aggregate l and r collations here, because r -
+  the constant item - has already been converted to a proper collation
+  for comparison. We only need to compare this collation with field's collation.
+
+  @retval true    can be used
+  @retval false   cannot be used
 */
 static bool
 test_if_equality_guarantees_uniqueness(Item *l, Item *r)
 {
   return r->const_item() &&
-    /* elements must be compared as dates */
-     (Arg_comparator::can_compare_as_dates(l, r, 0) ||
-      /* or of the same result type */
-      (r->result_type() == l->result_type() &&
-       /* and must have the same collation if compared as strings */
-       (l->result_type() != STRING_RESULT ||
-        l->collation.collation == r->collation.collation)));
+    item_cmp_type(l->cmp_type(), r->cmp_type()) == l->cmp_type() &&
+    (l->cmp_type() != STRING_RESULT ||
+     l->collation.collation == r->collation.collation);
 }
 
 
@@ -9924,6 +14504,8 @@ Field *create_tmp_field_from_field(THD *thd, Field *org_field,
       table->s->db_create_options|= HA_OPTION_PACK_RECORD;
     else if (org_field->type() == FIELD_TYPE_DOUBLE)
       ((Field_double *) new_field)->not_fixed= TRUE;
+    new_field->vcol_info= 0;
+    new_field->stored_in_db= TRUE;
   }
   return new_field;
 }
@@ -9982,15 +14564,12 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table,
   case STRING_RESULT:
     DBUG_ASSERT(item->collation.collation);
   
-    enum enum_field_types type;
     /*
       DATE/TIME and GEOMETRY fields have STRING_RESULT result type. 
       To preserve type they needed to be handled separately.
     */
-    if ((type= item->field_type()) == MYSQL_TYPE_DATETIME ||
-        type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE ||
-        type == MYSQL_TYPE_NEWDATE ||
-        type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY)
+    if (item->cmp_type() == TIME_RESULT ||
+        item->field_type() == MYSQL_TYPE_GEOMETRY)
       new_field= item->tmp_table_field_from_field_type(table, 1);
     /* 
       Make sure that the blob fits into a Field_varstring which has 
@@ -10122,6 +14701,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
   }
   case Item::FIELD_ITEM:
   case Item::DEFAULT_VALUE_ITEM:
+  case Item::INSERT_VALUE_ITEM:
   case Item::TRIGGER_FIELD_ITEM:
   {
     Item_field *field= (Item_field*) item;
@@ -10132,13 +14712,30 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
       If item have to be able to store NULLs but underlaid field can't do it,
       create_tmp_field_from_field() can't be used for tmp field creation.
     */
-    if (field->maybe_null && !field->field->maybe_null())
+    if (((field->maybe_null && field->in_rollup) ||      
+	(thd->create_tmp_table_for_derived  &&    /* for mat. view/dt */
+	 orig_item && orig_item->maybe_null)) &&         
+        !field->field->maybe_null())
     {
+      bool save_maybe_null= FALSE;
+      /*
+        The item the ref points to may have maybe_null flag set while
+        the ref doesn't have it. This may happen for outer fields
+        when the outer query decided at some point after name resolution phase
+        that this field might be null. Take this into account here.
+      */
+      if (orig_item)
+      {
+        save_maybe_null= item->maybe_null;
+        item->maybe_null= orig_item->maybe_null;
+      }
       result= create_tmp_field_from_item(thd, item, table, NULL,
                                          modify_item, convert_blob_length);
       *from_field= field->field;
       if (result && modify_item)
         field->result_field= result;
+      if (orig_item)
+        item->maybe_null= save_maybe_null;
     } 
     else if (table_cant_handle_bit_fields && field->field->type() ==
              MYSQL_TYPE_BIT)
@@ -10161,7 +14758,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
       ((Item_ref*)orig_item)->set_result_field(result);
     /*
       Fields that are used as arguments to the DEFAULT() function already have
-      their data pointers set to the default value during name resulotion. See
+      their data pointers set to the default value during name resolution. See
       Item_default_value::fix_fields.
     */
     if (orig_type != Item::DEFAULT_VALUE_ITEM && field->field->eq_def(result))
@@ -10213,6 +14810,8 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
   case Item::REF_ITEM:
   case Item::NULL_ITEM:
   case Item::VARBIN_ITEM:
+  case Item::CACHE_ITEM:
+  case Item::EXPR_CACHE_ITEM:
     if (make_copy_field)
     {
       DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -10243,9 +14842,15 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
   uint field_count= table->s->fields;
   bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count,
               FALSE);
-  bitmap_init(&table->tmp_set,
+  bitmap_init(&table->def_vcol_set,
               (my_bitmap_map*) (bitmaps+ bitmap_buffer_size(field_count)),
               field_count, FALSE);
+  bitmap_init(&table->tmp_set,
+              (my_bitmap_map*) (bitmaps+ 2*bitmap_buffer_size(field_count)),
+              field_count, FALSE);
+  bitmap_init(&table->eq_join_set,
+              (my_bitmap_map*) (bitmaps+ 3*bitmap_buffer_size(field_count)),
+              field_count, FALSE);
   /* write_set and all_set are copies of read_set */
   table->def_write_set= table->def_read_set;
   table->s->all_set= table->def_read_set;
@@ -10280,16 +14885,12 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
                               be used for name resolving; can be "".
 */
 
-#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
-#define AVG_STRING_LENGTH_TO_PACK_ROWS   64
-#define RATIO_TO_PACK_ROWS	       2
-#define MIN_STRING_LENGTH_TO_PACK_ROWS   10
-
 TABLE *
-create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
+create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
 		 ORDER *group, bool distinct, bool save_sum_fields,
 		 ulonglong select_options, ha_rows rows_limit,
-		 const char *table_alias)
+                 const char *table_alias, bool do_not_open,
+                 bool keep_row_order)
 {
   MEM_ROOT *mem_root_save, own_root;
   TABLE *table;
@@ -10313,7 +14914,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   KEY *keyinfo;
   KEY_PART_INFO *key_part_info;
   Item **copy_func;
-  MI_COLUMNDEF *recinfo;
+  ENGINE_COLUMNDEF *recinfo;
   /*
     total_uneven_bit_length is uneven bit length for visible fields
     hidden_uneven_bit_length is uneven bit length for hidden fields
@@ -10324,11 +14925,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   save_sum_fields|= param->precomputed_group_by;
   DBUG_ENTER("create_tmp_table");
   DBUG_PRINT("enter",
-             ("distinct: %d  save_sum_fields: %d  rows_limit: %lu  group: %d",
+             ("table_alias: '%s'  distinct: %d  save_sum_fields: %d  "
+              "rows_limit: %lu  group: %d", table_alias,
               (int) distinct, (int) save_sum_fields,
               (ulong) rows_limit,test(group)));
 
   status_var_increment(thd->status_var.created_tmp_tables);
+  thd->query_plan_flags|= QPLAN_TMP_TABLE;
 
   if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
     temp_pool_slot = bitmap_lock_set_next(&temp_pool);
@@ -10339,25 +14942,41 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   else
   {
     /* if we run out of slots or we are not using tempool */
-    sprintf(path,"%s%lx_%lx_%x", tmp_file_prefix,current_pid,
+    sprintf(path, "%s%lx_%lx_%x", tmp_file_prefix,current_pid,
             thd->thread_id, thd->tmp_table++);
   }
 
   /*
     No need to change table name to lower case as we are only creating
-    MyISAM or HEAP tables here
+    MyISAM, Aria or HEAP tables here
   */
-  fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
-
+  fn_format(path, path, mysql_tmpdir, "",
+            MY_REPLACE_EXT|MY_UNPACK_FILENAME);
 
   if (group)
   {
+    ORDER **prev= &group;
     if (!param->quick_group)
       group=0;					// Can't use group key
     else for (ORDER *tmp=group ; tmp ; tmp=tmp->next)
     {
+      /* Exclude found constant from the list */
+      if ((*tmp->item)->const_item())
+      {
+        *prev= tmp->next;
+        param->group_parts--;
+        continue;
+      }
+      else
+        prev= &(tmp->next);
+      /*
+        marker == 4 means two things:
+        - store NULLs in the key, and
+        - convert BIT fields to 64-bit long, needed because MEMORY tables
+          can't index BIT fields.
+      */
       (*tmp->item)->marker=4;			// Store null in key
-      if ((*tmp->item)->max_length >= CONVERT_IF_BIGGER_TO_BLOB)
+      if ((*tmp->item)->too_big_for_varchar())
 	using_unique_constraint=1;
     }
     if (param->group_length >= MAX_BLOB_WIDTH)
@@ -10397,7 +15016,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                         &tmpname, (uint) strlen(path)+1,
                         &group_buff, (group && ! using_unique_constraint ?
                                       param->group_length : 0),
-                        &bitmaps, bitmap_buffer_size(field_count)*2,
+                        &bitmaps, bitmap_buffer_size(field_count)*4,
                         NullS))
   {
     if (temp_pool_slot != MY_BIT_NONE)
@@ -10413,7 +15032,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     DBUG_RETURN(NULL);				/* purecov: inspected */
   }
   param->items_to_copy= copy_func;
-  strmov(tmpname,path);
+  strmov(tmpname, path);
   /* make table according to fields */
 
   bzero((char*) table,sizeof(*table));
@@ -10426,9 +15045,9 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   thd->mem_root= &table->mem_root;
 
   table->field=reg_field;
-  table->alias= table_alias;
+  table->alias.set(table_alias, strlen(table_alias), table_alias_charset);
+
   table->reginfo.lock_type=TL_WRITE;	/* Will be updated */
-  table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
   table->map=1;
   table->temp_pool_slot = temp_pool_slot;
   table->copy_blobs= 1;
@@ -10436,13 +15055,14 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   table->quick_keys.init();
   table->covering_keys.init();
   table->merge_keys.init();
+  table->intersect_keys.init();
   table->keys_in_use_for_query.init();
+  table->no_rows_with_nulls= param->force_not_null_cols;
 
   table->s= share;
   init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
   share->blob_field= blob_field;
   share->blob_ptr_size= portable_sizeof_char_ptr;
-  share->db_low_byte_first=1;                // True for HEAP and MyISAM
   share->table_charset= param->table_charset;
   share->primary_key= MAX_KEY;               // Indicate no primary key
   share->keys_for_keyread.init();
@@ -10468,7 +15088,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       {
         if (item->used_tables() & OUTER_REF_TABLE_BIT)
           item->update_used_tables();
-        if (type == Item::SUBSELECT_ITEM ||
+        if ((item->real_type() == Item::SUBSELECT_ITEM) ||
             (item->used_tables() & ~OUTER_REF_TABLE_BIT))
         {
 	  /*
@@ -10519,6 +15139,11 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
           thd->mem_root= mem_root_save;
           arg= sum_item->set_arg(i, thd, new Item_field(new_field));
           thd->mem_root= &table->mem_root;
+          if (param->force_not_null_cols)
+	  {
+            new_field->flags|= NOT_NULL_FLAG;
+            new_field->null_ptr= NULL;
+          }
 	  if (!(new_field->flags & NOT_NULL_FLAG))
           {
 	    null_count++;
@@ -10554,18 +15179,51 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                          group != 0,
                          !force_copy_fields &&
                            (not_all_columns || group !=0),
-                         item->marker == 4, force_copy_fields,
+                         /*
+                           If item->marker == 4 then we force create_tmp_field
+                           to create a 64-bit longs for BIT fields because HEAP
+                           tables can't index BIT fields directly. We do the same
+                           for distinct, as we want the distinct index to be
+                           usable in this case too.
+                         */
+                         item->marker == 4  || param->bit_fields_as_long,
+                         force_copy_fields,
                          param->convert_blob_length);
 
       if (!new_field)
       {
 	if (thd->is_fatal_error)
 	  goto err;				// Got OOM
-	continue;				// Some kindf of const item
+	continue;				// Some kind of const item
       }
       if (type == Item::SUM_FUNC_ITEM)
-	((Item_sum *) item)->result_field= new_field;
+      {
+        Item_sum *agg_item= (Item_sum *) item;
+        /*
+          Update the result field only if it has never been set, or if the
+          created temporary table is not to be used for subquery
+          materialization.
+
+          The reason is that for subqueries that require materialization as part
+          of their plan, we create the 'external' temporary table needed for IN
+          execution, after the 'internal' temporary table needed for grouping.
+          Since both the external and the internal temporary tables are created
+          for the same list of SELECT fields of the subquery, setting
+          'result_field' for each invocation of create_tmp_table overrides the
+           previous value of 'result_field'.
+
+          The condition below prevents the creation of the external temp table
+          to override the 'result_field' that was set for the internal temp table.
+        */
+        if (!agg_item->result_field || !param->materialized_subquery)
+          agg_item->result_field= new_field;
+      }
       tmp_from_field++;
+      if (param->force_not_null_cols)
+      {
+        new_field->flags|= NOT_NULL_FLAG;
+        new_field->null_ptr= NULL;
+      }
       reclength+=new_field->pack_length();
       if (!(new_field->flags & NOT_NULL_FLAG))
 	null_count++;
@@ -10576,6 +15234,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
         *blob_field++= fieldnr;
 	blob_count++;
       }
+      if (new_field->real_type() == MYSQL_TYPE_STRING ||
+          new_field->real_type() == MYSQL_TYPE_VARCHAR)
+      {
+        string_count++;
+        string_total_length+= new_field->pack_length();
+      }
       if (item->marker == 4 && item->maybe_null)
       {
 	group_null_items++;
@@ -10613,14 +15277,16 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   *reg_field= 0;
   *blob_field= 0;				// End marker
   share->fields= field_count;
+  share->column_bitmap_size= bitmap_buffer_size(share->fields);
 
   /* If result table is small; use a heap */
   /* future: storage engine selection can be made dynamic? */
   if (blob_count || using_unique_constraint
       || (thd->variables.big_tables && !(select_options & SELECT_SMALL_RESULT))
-      || (select_options & TMP_TABLE_FORCE_MYISAM))
+      || (select_options & TMP_TABLE_FORCE_MYISAM)
+      || thd->variables.tmp_table_size == 0)
   {
-    share->db_plugin= ha_lock_engine(0, myisam_hton);
+    share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
     table->file= get_new_handler(share, &table->mem_root,
                                  share->db_type());
     if (group &&
@@ -10637,7 +15303,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   if (!table->file)
     goto err;
 
-
   if (!using_unique_constraint)
     reclength+= group_null_items;	// null flag is stored separately
 
@@ -10660,8 +15325,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   /* Use packed rows if there is blobs or a lot of space to gain */
   if (blob_count ||
       (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
-      (reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
-       string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
+       (reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
+        string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
     use_packed_rows= 1;
 
   share->reclength= reclength;
@@ -10692,7 +15357,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 
     table->null_flags= (uchar*) table->record[0];
     share->null_fields= null_count+ hidden_null_count;
-    share->null_bytes= null_pack_length;
+    share->null_bytes= share->null_bytes_for_compare= null_pack_length;
   }
   null_count= (blob_count == 0) ? 1 : 0;
   hidden_field_count=param->hidden_field_count;
@@ -10704,23 +15369,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 
     if (!(field->flags & NOT_NULL_FLAG))
     {
-      if (field->flags & GROUP_FLAG && !using_unique_constraint)
-      {
-	/*
-	  We have to reserve one byte here for NULL bits,
-	  as this is updated by 'end_update()'
-	*/
-	*pos++=0;				// Null is stored here
-	recinfo->length=1;
-	recinfo->type=FIELD_NORMAL;
-	recinfo++;
-	bzero((uchar*) recinfo,sizeof(*recinfo));
-      }
-      else
-      {
-	recinfo->null_bit= (uint8)1 << (null_count & 7);
-	recinfo->null_pos= null_count/8;
-      }
+      recinfo->null_bit= (uint8)1 << (null_count & 7);
+      recinfo->null_pos= null_count/8;
       field->move_field(pos,null_flags+null_count/8,
 			(uint8)1 << (null_count & 7));
       null_count++;
@@ -10774,22 +15424,25 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     /* Make entry for create table */
     recinfo->length=length;
     if (field->flags & BLOB_FLAG)
-      recinfo->type= (int) FIELD_BLOB;
+      recinfo->type= FIELD_BLOB;
     else if (use_packed_rows &&
              field->real_type() == MYSQL_TYPE_STRING &&
 	     length >= MIN_STRING_LENGTH_TO_PACK_ROWS)
-      recinfo->type=FIELD_SKIP_ENDSPACE;
+      recinfo->type= FIELD_SKIP_ENDSPACE;
+    else if (field->real_type() == MYSQL_TYPE_VARCHAR)
+      recinfo->type= FIELD_VARCHAR;
     else
-      recinfo->type=FIELD_NORMAL;
+      recinfo->type= FIELD_NORMAL;
+
     if (!--hidden_field_count)
       null_count=(null_count+7) & ~7;		// move to next byte
 
     // fix table name in field entry
-    field->table_name= &table->alias;
+    field->set_table_name(&table->alias);
   }
 
   param->copy_field_end=copy;
-  param->recinfo=recinfo;
+  param->recinfo= recinfo;              	// Pointer to after last field
   store_record(table,s->default_values);        // Make empty default record
 
   if (thd->variables.tmp_table_size == ~ (ulonglong) 0)		// No limit
@@ -10817,10 +15470,14 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     param->group_buff=group_buff;
     share->keys=1;
     share->uniques= test(using_unique_constraint);
-    table->key_info=keyinfo;
+    table->key_info= table->s->key_info= keyinfo;
+    table->keys_in_use_for_query.set_bit(0);
+    share->keys_in_use.set_bit(0);
     keyinfo->key_part=key_part_info;
-    keyinfo->flags=HA_NOSAME;
+    keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
+    keyinfo->ext_key_flags= keyinfo->flags;
     keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts;
+    keyinfo->ext_key_parts= keyinfo->key_parts;
     keyinfo->key_length=0;
     keyinfo->rec_per_key=0;
     keyinfo->algorithm= HA_KEY_ALG_UNDEF;
@@ -10833,6 +15490,9 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       bool maybe_null=(*cur_group->item)->maybe_null;
       key_part_info->null_bit=0;
       key_part_info->field=  field;
+      key_part_info->fieldnr= field->field_index + 1;
+      if (cur_group == group)
+        field->key_start.set_bit(0);
       key_part_info->offset= field->offset(table->record[0]);
       key_part_info->length= (uint16) field->key_length();
       key_part_info->type=   (uint8) field->key_type();
@@ -10845,12 +15505,26 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       if (!using_unique_constraint)
       {
 	cur_group->buff=(char*) group_buff;
+
+        if (maybe_null && !field->null_bit)
+        {
+          /*
+            This can only happen in the unusual case where an outer join
+            table was found to be not-nullable by the optimizer and we
+            the item can't really be null.
+            We solve this by marking the item as !maybe_null to ensure
+            that the key,field and item definition match.
+          */
+          (*cur_group->item)->maybe_null= maybe_null= 0;
+        }
+
 	if (!(cur_group->field= field->new_key_field(thd->mem_root,table,
                                                      group_buff +
                                                      test(maybe_null),
                                                      field->null_ptr,
                                                      field->null_bit)))
 	  goto err; /* purecov: inspected */
+
 	if (maybe_null)
 	{
 	  /*
@@ -10872,6 +15546,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       }
       keyinfo->key_length+=  key_part_info->length;
     }
+    /*
+      Ensure we didn't overrun the group buffer. The < is only true when
+      some maybe_null fields was changed to be not null fields.
+    */
+    DBUG_ASSERT(using_unique_constraint ||
+                group_buff <= param->group_buff + param->group_length);
   }
 
   if (distinct && field_count != param->hidden_field_count)
@@ -10884,29 +15564,56 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     */
     DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count));
 
-    null_pack_length-=hidden_null_pack_length;
-    keyinfo->key_parts= ((field_count-param->hidden_field_count)+
-			 test(null_pack_length));
-    table->distinct= 1;
-    share->keys= 1;
     if (blob_count)
     {
-      using_unique_constraint=1;
+      /*
+        Special mode for index creation in MyISAM used to support unique
+        indexes on blobs with arbitrary length. Such indexes cannot be
+        used for lookups.
+      */
       share->uniques= 1;
     }
+    null_pack_length-=hidden_null_pack_length;
+    keyinfo->key_parts= ((field_count-param->hidden_field_count)+
+			 (share->uniques ? test(null_pack_length) : 0));
+    keyinfo->ext_key_parts= keyinfo->key_parts;
+    table->distinct= 1;
+    share->keys= 1;
     if (!(key_part_info= (KEY_PART_INFO*)
           alloc_root(&table->mem_root,
                      keyinfo->key_parts * sizeof(KEY_PART_INFO))))
       goto err;
     bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO));
-    table->key_info=keyinfo;
+    table->keys_in_use_for_query.set_bit(0);
+    share->keys_in_use.set_bit(0);
+    table->key_info= table->s->key_info= keyinfo;
     keyinfo->key_part=key_part_info;
-    keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL;
-    keyinfo->key_length=(uint16) reclength;
+    keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY | HA_PACK_KEY;
+    keyinfo->ext_key_flags= keyinfo->flags;
+    keyinfo->key_length= 0;  // Will compute the sum of the parts below.
     keyinfo->name= (char*) "distinct_key";
     keyinfo->algorithm= HA_KEY_ALG_UNDEF;
-    keyinfo->rec_per_key=0;
-    if (null_pack_length)
+    /*
+      Needed by non-merged semi-joins: SJ-Materialized table must have a valid 
+      rec_per_key array, because it participates in join optimization. Since
+      the table has no data, the only statistics we can provide is "unknown",
+      i.e. zero values.
+
+      (For table record count, we calculate and set JOIN_TAB::found_records,
+       see get_delayed_table_estimates()).
+    */
+    size_t rpk_size= keyinfo->key_parts* sizeof(keyinfo->rec_per_key[0]);
+    if (!(keyinfo->rec_per_key= (ulong*) alloc_root(&table->mem_root, 
+                                                    rpk_size)))
+      goto err;
+    bzero(keyinfo->rec_per_key, rpk_size);
+
+    /*
+      Create an extra field to hold NULL bits so that unique indexes on
+      blobs can distinguish NULL from 0. This extra field is not needed
+      when we do not use UNIQUE indexes for blobs.
+    */
+    if (null_pack_length && share->uniques)
     {
       key_part_info->null_bit=0;
       key_part_info->offset=hidden_null_pack_length;
@@ -10922,6 +15629,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       key_part_info->field->init(table);
       key_part_info->key_type=FIELDFLAG_BINARY;
       key_part_info->type=    HA_KEYTYPE_BINARY;
+      key_part_info->fieldnr= key_part_info->field->field_index + 1;
       key_part_info++;
     }
     /* Create a distinct key over the columns we are going to return */
@@ -10929,10 +15637,47 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 	 i < field_count;
 	 i++, reg_field++, key_part_info++)
     {
-      key_part_info->null_bit=0;
       key_part_info->field=    *reg_field;
+      (*reg_field)->flags |= PART_KEY_FLAG;
+      if (key_part_info == keyinfo->key_part)
+        (*reg_field)->key_start.set_bit(0);
+      key_part_info->null_bit= (*reg_field)->null_bit;
+      key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
+                                          (uchar*) table->record[0]);
+
       key_part_info->offset=   (*reg_field)->offset(table->record[0]);
       key_part_info->length=   (uint16) (*reg_field)->pack_length();
+      key_part_info->fieldnr= (*reg_field)->field_index + 1;
+      /* TODO:
+        The below method of computing the key format length of the
+        key part is a copy/paste from opt_range.cc, and table.cc.
+        This should be factored out, e.g. as a method of Field.
+        In addition it is not clear if any of the Field::*_length
+        methods is supposed to compute the same length. If so, it
+        might be reused.
+      */
+      key_part_info->store_length= key_part_info->length;
+
+      if ((*reg_field)->real_maybe_null())
+      {
+        key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+        key_part_info->key_part_flag |= HA_NULL_PART;
+      }
+      if ((*reg_field)->type() == MYSQL_TYPE_BLOB ||
+          (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR ||
+          (*reg_field)->type() == MYSQL_TYPE_GEOMETRY)
+      {
+        if ((*reg_field)->type() == MYSQL_TYPE_BLOB ||
+            (*reg_field)->type() == MYSQL_TYPE_GEOMETRY)
+          key_part_info->key_part_flag|= HA_BLOB_PART;
+        else
+          key_part_info->key_part_flag|= HA_VAR_LENGTH_PART;
+
+        key_part_info->store_length+=HA_KEY_BLOB_LENGTH;
+      }
+
+      keyinfo->key_length+= key_part_info->store_length;
+
       key_part_info->type=     (uint8) (*reg_field)->key_type();
       key_part_info->key_type =
 	((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
@@ -10945,14 +15690,21 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   if (thd->is_fatal_error)				// If end of memory
     goto err;					 /* purecov: inspected */
   share->db_record_offset= 1;
-  if (share->db_type() == myisam_hton)
+  table->used_for_duplicate_elimination= (param->sum_func_count == 0 &&
+                                          (table->group || table->distinct));
+  table->keep_row_order= keep_row_order;
+
+  if (!do_not_open)
   {
-    if (create_myisam_tmp_table(table, param, select_options,
-                                thd->variables.big_tables))
+    if (share->db_type() == TMP_ENGINE_HTON)
+    {
+      if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo,
+                                    &param->recinfo, select_options))
+        goto err;
+    }
+    if (open_tmp_table(table))
       goto err;
   }
-  if (open_tmp_table(table))
-    goto err;
 
   // Make empty record so random data is not written to disk
   empty_record(table);
@@ -10964,10 +15716,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 err:
   thd->mem_root= mem_root_save;
   free_tmp_table(thd,table);                    /* purecov: inspected */
+  if (temp_pool_slot != MY_BIT_NONE)
+    bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
   DBUG_RETURN(NULL);				/* purecov: inspected */
 }
 
 
+
 /****************************************************************************/
 
 /**
@@ -11008,7 +15763,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
                         &share, sizeof(*share),
                         &field, (field_count + 1) * sizeof(Field*),
                         &blob_field, (field_count+1) *sizeof(uint),
-                        &bitmaps, bitmap_buffer_size(field_count)*2,
+                        &bitmaps, bitmap_buffer_size(field_count)*4,
                         NullS))
     return 0;
 
@@ -11020,7 +15775,6 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
   share->blob_field= blob_field;
   share->fields= field_count;
   share->blob_ptr_size= portable_sizeof_char_ptr;
-  share->db_low_byte_first=1;                // True for HEAP and MyISAM
   setup_tmp_table_column_bitmaps(table, bitmaps);
 
   /* Create all fields and calculate the total length of record */
@@ -11060,7 +15814,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
   {
     table->null_flags= (uchar*) table->record[0];
     share->null_fields= null_count;
-    share->null_bytes= null_pack_length;
+    share->null_bytes= share->null_bytes_for_compare= null_pack_length;
   }
 
   table->in_use= thd;           /* field->reset() may access table->in_use */
@@ -11110,30 +15864,254 @@ error:
 }
 
 
-static bool open_tmp_table(TABLE *table)
+bool open_tmp_table(TABLE *table)
 {
   int error;
-  if ((error=table->file->ha_open(table, table->s->table_name.str,O_RDWR,
-                                  HA_OPEN_TMP_TABLE | HA_OPEN_INTERNAL_TABLE)))
+  if ((error= table->file->ha_open(table, table->s->table_name.str, O_RDWR,
+                                   HA_OPEN_TMP_TABLE |
+                                   HA_OPEN_INTERNAL_TABLE)))
   {
     table->file->print_error(error,MYF(0)); /* purecov: inspected */
     table->db_stat=0;
     return(1);
   }
+  table->db_stat= HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
   (void) table->file->extra(HA_EXTRA_QUICK);		/* Faster */
+  table->created= TRUE;
   return(0);
 }
 
 
-static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
-				    ulonglong options, my_bool big_tables)
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_ARIA_FOR_TMP_TABLES)
+
+/*
+  Create internal (MyISAM or Maria) temporary table
+
+  SYNOPSIS
+    create_internal_tmp_table()
+      table           Table object that descrimes the table to be created
+      keyinfo         Description of the index (there is always one index)
+      start_recinfo   engine's column descriptions
+      recinfo INOUT   End of engine's column descriptions
+      options         Option bits
+   
+  DESCRIPTION
+    Create an internal emporary table according to passed description. The is
+    assumed to have one unique index or constraint.
+
+    The passed array or ENGINE_COLUMNDEF structures must have this form:
+
+      1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
+         when there are many nullable columns)
+      2. Table columns
+      3. One free ENGINE_COLUMNDEF element (*recinfo points here)
+   
+    This function may use the free element to create hash column for unique
+    constraint.
+
+   RETURN
+     FALSE - OK
+     TRUE  - Error
+*/
+
+
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options)
+{
+  int error;
+  MARIA_KEYDEF keydef;
+  MARIA_UNIQUEDEF uniquedef;
+  TABLE_SHARE *share= table->s;
+  MARIA_CREATE_INFO create_info;
+  DBUG_ENTER("create_internal_tmp_table");
+
+  if (share->keys)
+  {						// Get keys for ni_create
+    bool using_unique_constraint=0;
+    HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root,
+                                            sizeof(*seg) * keyinfo->key_parts);
+    if (!seg)
+      goto err;
+
+    bzero(seg, sizeof(*seg) * keyinfo->key_parts);
+    if (keyinfo->key_length > table->file->max_key_length() ||
+	keyinfo->key_parts > table->file->max_key_parts() ||
+	share->uniques)
+    {
+      if (!share->uniques && !(keyinfo->flags & HA_NOSAME))
+      {
+        my_error(ER_INTERNAL_ERROR, MYF(0),
+                 "Using too big key for internal temp tables");
+        DBUG_RETURN(1);
+      }
+
+      /* Can't create a key; Make a unique constraint instead of a key */
+      share->keys=    0;
+      share->uniques= 1;
+      using_unique_constraint=1;
+      bzero((char*) &uniquedef,sizeof(uniquedef));
+      uniquedef.keysegs=keyinfo->key_parts;
+      uniquedef.seg=seg;
+      uniquedef.null_are_equal=1;
+
+      /* Create extra column for hash value */
+      bzero((uchar*) *recinfo,sizeof(**recinfo));
+      (*recinfo)->type=   FIELD_CHECK;
+      (*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH;
+      (*recinfo)++;
+      share->reclength+=      MARIA_UNIQUE_HASH_LENGTH;
+    }
+    else
+    {
+      /* Create a key */
+      bzero((char*) &keydef,sizeof(keydef));
+      keydef.flag= keyinfo->flags & HA_NOSAME;
+      keydef.keysegs=  keyinfo->key_parts;
+      keydef.seg= seg;
+    }
+    for (uint i=0; i < keyinfo->key_parts ; i++,seg++)
+    {
+      Field *field=keyinfo->key_part[i].field;
+      seg->flag=     0;
+      seg->language= field->charset()->number;
+      seg->length=   keyinfo->key_part[i].length;
+      seg->start=    keyinfo->key_part[i].offset;
+      if (field->flags & BLOB_FLAG)
+      {
+	seg->type=
+	((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
+	 HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
+	seg->bit_start= (uint8)(field->pack_length() - share->blob_ptr_size);
+	seg->flag= HA_BLOB_PART;
+	seg->length=0;			// Whole blob in unique constraint
+      }
+      else
+      {
+	seg->type= keyinfo->key_part[i].type;
+        /* Tell handler if it can do suffic space compression */
+	if (field->real_type() == MYSQL_TYPE_STRING &&
+	    keyinfo->key_part[i].length > 32)
+	  seg->flag|= HA_SPACE_PACK;
+      }
+      if (!(field->flags & NOT_NULL_FLAG))
+      {
+	seg->null_bit= field->null_bit;
+	seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
+	/*
+	  We are using a GROUP BY on something that contains NULL
+	  In this case we have to tell Aria that two NULL should
+	  on INSERT be regarded at the same value
+	*/
+	if (!using_unique_constraint)
+	  keydef.flag|= HA_NULL_ARE_EQUAL;
+      }
+    }
+  }
+  bzero((char*) &create_info,sizeof(create_info));
+
+  /* Use long data format, to ensure we never get a 'table is full' error */
+  if (!(options & SELECT_SMALL_RESULT))
+    create_info.data_file_length= ~(ulonglong) 0;
+
+  /*
+    The logic for choosing the record format:
+    The STATIC_RECORD format is the fastest one, because it's so simple,
+    so we use this by default for short rows.
+    BLOCK_RECORD caches both row and data, so this is generally faster than
+    DYNAMIC_RECORD. The one exception is when we write to tmp table and
+    want to use keys for duplicate elimination as with BLOCK RECORD
+    we first write the row, then check for key conflicts and then we have to
+    delete the row.  The cases when this can happen is when there is
+    a group by and no sum functions or if distinct is used.
+  */
+  if ((error= maria_create(share->table_name.str,
+                           table->no_rows ? NO_RECORD :
+                           (share->reclength < 64 &&
+                            !share->blob_fields ? STATIC_RECORD :
+                            table->used_for_duplicate_elimination ||
+                            table->keep_row_order ?
+                            DYNAMIC_RECORD : BLOCK_RECORD),
+                           share->keys, &keydef,
+                           (uint) (*recinfo-start_recinfo),
+                           start_recinfo,
+                           share->uniques, &uniquedef,
+                           &create_info,
+                           HA_CREATE_TMP_TABLE)))
+  {
+    table->file->print_error(error,MYF(0));	/* purecov: inspected */
+    table->db_stat=0;
+    goto err;
+  }
+  status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
+  table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
+  share->db_record_offset= 1;
+  DBUG_RETURN(0);
+ err:
+  DBUG_RETURN(1);
+}
+
+
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error,
+                                         bool ignore_last_dupp_key_error,
+                                         bool *is_duplicate)
+{
+  return create_internal_tmp_table_from_heap2(thd, table, 
+                                              start_recinfo, recinfo, error,
+                                              ignore_last_dupp_key_error,
+                                              maria_hton,
+                                              "converting HEAP to Aria",
+                                              is_duplicate);
+}
+
+#else
+
+/*
+  Create internal (MyISAM or Maria) temporary table
+
+  SYNOPSIS
+    create_internal_tmp_table()
+      table           Table object that descrimes the table to be created
+      keyinfo         Description of the index (there is always one index)
+      start_recinfo   engine's column descriptions
+      recinfo INOUT   End of engine's column descriptions
+      options         Option bits
+   
+  DESCRIPTION
+    Create an internal emporary table according to passed description. The is
+    assumed to have one unique index or constraint.
+
+    The passed array or ENGINE_COLUMNDEF structures must have this form:
+
+      1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
+         when there are many nullable columns)
+      2. Table columns
+      3. One free ENGINE_COLUMNDEF element (*recinfo points here)
+   
+    This function may use the free element to create hash column for unique
+    constraint.
+
+   RETURN
+     FALSE - OK
+     TRUE  - Error
+*/
+
+/* Create internal MyISAM temporary table */
+
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo,
+                               ulonglong options)
 {
   int error;
   MI_KEYDEF keydef;
   MI_UNIQUEDEF uniquedef;
-  KEY *keyinfo=param->keyinfo;
   TABLE_SHARE *share= table->s;
-  DBUG_ENTER("create_myisam_tmp_table");
+  DBUG_ENTER("create_internal_tmp_table");
 
   if (share->keys)
   {						// Get keys for ni_create
@@ -11144,7 +16122,7 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
       goto err;
 
     bzero(seg, sizeof(*seg) * keyinfo->key_parts);
-    if (keyinfo->key_length >= table->file->max_key_length() ||
+    if (keyinfo->key_length > table->file->max_key_length() ||
 	keyinfo->key_parts > table->file->max_key_parts() ||
 	share->uniques)
     {
@@ -11158,17 +16136,18 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
       uniquedef.null_are_equal=1;
 
       /* Create extra column for hash value */
-      bzero((uchar*) param->recinfo,sizeof(*param->recinfo));
-      param->recinfo->type= FIELD_CHECK;
-      param->recinfo->length=MI_UNIQUE_HASH_LENGTH;
-      param->recinfo++;
+      bzero((uchar*) *recinfo,sizeof(**recinfo));
+      (*recinfo)->type= FIELD_CHECK;
+      (*recinfo)->length=MI_UNIQUE_HASH_LENGTH;
+      (*recinfo)++;
       share->reclength+=MI_UNIQUE_HASH_LENGTH;
     }
     else
     {
       /* Create an unique key */
       bzero((char*) &keydef,sizeof(keydef));
-      keydef.flag=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
+      keydef.flag= ((keyinfo->flags & HA_NOSAME) | HA_BINARY_PACK_KEY |
+                    HA_PACK_KEY);
       keydef.keysegs=  keyinfo->key_parts;
       keydef.seg= seg;
     }
@@ -11213,90 +16192,75 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
   MI_CREATE_INFO create_info;
   bzero((char*) &create_info,sizeof(create_info));
 
-  if (big_tables && !(options & SELECT_SMALL_RESULT))
+  if (!(options & SELECT_SMALL_RESULT))
     create_info.data_file_length= ~(ulonglong) 0;
 
   if ((error=mi_create(share->table_name.str, share->keys, &keydef,
-		       (uint) (param->recinfo-param->start_recinfo),
-		       param->start_recinfo,
+		       (uint) (*recinfo-start_recinfo),
+		       start_recinfo,
 		       share->uniques, &uniquedef,
 		       &create_info,
 		       HA_CREATE_TMP_TABLE)))
   {
     table->file->print_error(error,MYF(0));	/* purecov: inspected */
-    /*
-      Table name which was allocated from temp-pool is already occupied
-      in SE. Probably we hit a bug in server or some problem with system
-      configuration. Prevent problem from re-occurring by marking temp-pool
-      slot for this name as permanently busy, to do this we only need to set
-      TABLE::temp_pool_slot to MY_BIT_NONE in order to avoid freeing it
-      in free_tmp_table().
-    */
-    if (error == EEXIST)
-      table->temp_pool_slot= MY_BIT_NONE;
     table->db_stat=0;
     goto err;
   }
   status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
+  table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
   share->db_record_offset= 1;
+  table->created= TRUE;
   DBUG_RETURN(0);
  err:
   DBUG_RETURN(1);
 }
 
 
-void
-free_tmp_table(THD *thd, TABLE *entry)
-{
-  MEM_ROOT own_root= entry->mem_root;
-  const char *save_proc_info;
-  DBUG_ENTER("free_tmp_table");
-  DBUG_PRINT("enter",("table: %s",entry->alias));
-
-  save_proc_info=thd->proc_info;
-  thd_proc_info(thd, "removing tmp table");
-
-  // Release latches since this can take a long time
-  ha_release_temporary_latches(thd);
-
-  if (entry->file)
-  {
-    if (entry->db_stat)
-      entry->file->ha_drop_table(entry->s->table_name.str);
-    else
-      entry->file->ha_delete_table(entry->s->table_name.str);
-    delete entry->file;
-  }
-
-  /* free blobs */
-  for (Field **ptr=entry->field ; *ptr ; ptr++)
-    (*ptr)->free();
-  free_io_cache(entry);
-
-  if (entry->temp_pool_slot != MY_BIT_NONE)
-    bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
+/**
+  If a HEAP table gets full, create a MyISAM table and copy all rows to this
+*/
 
-  plugin_unlock(0, entry->s->db_plugin);
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error,
+                                         bool ignore_last_dupp_key_error,
+                                         bool *is_duplicate)
+{
+  return create_internal_tmp_table_from_heap2(thd, table, 
+                                              start_recinfo, recinfo, error,
+                                              ignore_last_dupp_key_error,
+                                              myisam_hton,
+                                              "converting HEAP to MyISAM",
+                                              is_duplicate);
+}
 
-  free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
-  thd_proc_info(thd, save_proc_info);
+#endif /* WITH_MARIA_STORAGE_ENGINE */
 
-  DBUG_VOID_RETURN;
-}
 
-/**
-  If a HEAP table gets full, create a MyISAM table and copy all rows
-  to this.
+/*
+  If a HEAP table gets full, create a internal table in MyISAM or Maria
+  and copy all rows to this
 */
 
-bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
-			     int error, bool ignore_last_dupp_key_error)
+
+static bool
+create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
+                                     ENGINE_COLUMNDEF *start_recinfo,
+                                     ENGINE_COLUMNDEF **recinfo, 
+                                     int error,
+                                     bool ignore_last_dupp_key_error,
+                                     handlerton *hton,
+                                     const char *proc_info,
+                                     bool *is_duplicate)
 {
   TABLE new_table;
   TABLE_SHARE share;
   const char *save_proc_info;
-  int write_err;
-  DBUG_ENTER("create_myisam_from_heap");
+  int write_err= 0;
+  DBUG_ENTER("create_internal_tmp_table_from_heap2");
+  if (is_duplicate)
+    *is_duplicate= FALSE;
 
   if (table->s->db_type() != heap_hton || 
       error != HA_ERR_RECORD_FILE_FULL)
@@ -11308,77 +16272,81 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
     table->file->print_error(error, MYF(ME_FATALERROR));
     DBUG_RETURN(1);
   }
-
-  // Release latches since this can take a long time
-  ha_release_temporary_latches(thd);
-
   new_table= *table;
   share= *table->s;
   new_table.s= &share;
-  new_table.s->db_plugin= ha_lock_engine(thd, myisam_hton);
+  new_table.s->db_plugin= ha_lock_engine(thd, hton);
   if (!(new_table.file= get_new_handler(&share, &new_table.mem_root,
                                         new_table.s->db_type())))
     DBUG_RETURN(1);				// End of memory
 
   save_proc_info=thd->proc_info;
-  thd_proc_info(thd, "converting HEAP to MyISAM");
+  thd_proc_info(thd, proc_info);
 
-  if (create_myisam_tmp_table(&new_table, param,
-			      thd->lex->select_lex.options | thd->variables.option_bits,
-                              thd->variables.big_tables))
+  new_table.no_rows= table->no_rows;
+  if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo,
+                                recinfo,
+                                thd->lex->select_lex.options | 
+			        thd->variables.option_bits))
     goto err2;
   if (open_tmp_table(&new_table))
     goto err1;
   if (table->file->indexes_are_disabled())
     new_table.file->ha_disable_indexes(HA_KEY_SWITCH_ALL);
   table->file->ha_index_or_rnd_end();
-  table->file->ha_rnd_init(1);
-  if (table->no_rows)
-  {
+  if (table->file->ha_rnd_init_with_error(1))
+    DBUG_RETURN(1);
+  if (new_table.no_rows)
     new_table.file->extra(HA_EXTRA_NO_ROWS);
-    new_table.no_rows=1;
+  else
+  {
+    /* update table->file->stats.records */
+    table->file->info(HA_STATUS_VARIABLE);
+    new_table.file->ha_start_bulk_insert(table->file->stats.records);
   }
 
-#ifdef TO_BE_DONE_LATER_IN_4_1
-  /*
-    To use start_bulk_insert() (which is new in 4.1) we need to find
-    all places where a corresponding end_bulk_insert() should be put.
-  */
-  table->file->info(HA_STATUS_VARIABLE); /* update table->file->stats.records */
-  new_table.file->ha_start_bulk_insert(table->file->stats.records);
-#else
-  /* HA_EXTRA_WRITE_CACHE can stay until close, no need to disable it */
-  new_table.file->extra(HA_EXTRA_WRITE_CACHE);
-#endif
-
   /*
     copy all old rows from heap table to MyISAM table
     This is the only code that uses record[1] to read/write but this
     is safe as this is a temporary MyISAM table without timestamp/autoincrement
     or partitioning.
   */
-  while (!table->file->rnd_next(new_table.record[1]))
+  while (!table->file->ha_rnd_next(new_table.record[1]))
   {
-    write_err= new_table.file->ha_write_row(new_table.record[1]);
+    write_err= new_table.file->ha_write_tmp_row(new_table.record[1]);
     DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;);
     if (write_err)
       goto err;
+    if (thd->killed)
+    {
+      thd->send_kill_message();
+      goto err_killed;
+    }
   }
+  if (!new_table.no_rows && new_table.file->ha_end_bulk_insert())
+    goto err;
   /* copy row that filled HEAP table */
-  if ((write_err=new_table.file->ha_write_row(table->record[0])))
+  if ((write_err=new_table.file->ha_write_tmp_row(table->record[0])))
   {
     if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) ||
 	!ignore_last_dupp_key_error)
       goto err;
+    if (is_duplicate)
+      *is_duplicate= TRUE;
+  }
+  else
+  {
+    if (is_duplicate)
+      *is_duplicate= FALSE;
   }
 
   /* remove heap table and change to use myisam table */
   (void) table->file->ha_rnd_end();
-  (void) table->file->close();                  // This deletes the table !
+  (void) table->file->ha_close();          // This deletes the table !
   delete table->file;
   table->file=0;
   plugin_unlock(0, table->s->db_plugin);
-  share.db_plugin= my_plugin_lock(0, &share.db_plugin);
+  share.db_plugin= my_plugin_lock(0, share.db_plugin);
   new_table.s= table->s;                       // Keep old share
   *table= new_table;
   *table->s= share;
@@ -11386,15 +16354,16 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
   table->file->change_table_ptr(table, table->s);
   table->use_all_columns();
   if (save_proc_info)
-    thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ?
-                  "Copying to tmp table on disk" : save_proc_info));
+    thd_proc_info(thd, save_proc_info == copy_to_tmp_table ?
+                  "Copying to tmp table on disk" : save_proc_info);
   DBUG_RETURN(0);
 
  err:
   DBUG_PRINT("error",("Got error: %d",write_err));
   table->file->print_error(write_err, MYF(0));
+err_killed:
   (void) table->file->ha_rnd_end();
-  (void) new_table.file->close();
+  (void) new_table.file->ha_close();
  err1:
   new_table.file->ha_delete_table(new_table.s->table_name.str);
  err2:
@@ -11405,6 +16374,46 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
 }
 
 
+void
+free_tmp_table(THD *thd, TABLE *entry)
+{
+  MEM_ROOT own_root= entry->mem_root;
+  const char *save_proc_info;
+  DBUG_ENTER("free_tmp_table");
+  DBUG_PRINT("enter",("table: %s  alias: %s",entry->s->table_name.str,
+                      entry->alias.c_ptr()));
+
+  save_proc_info=thd->proc_info;
+  thd_proc_info(thd, "removing tmp table");
+
+  if (entry->file && entry->created)
+  {
+    entry->file->ha_index_or_rnd_end();
+    if (entry->db_stat)
+      entry->file->ha_drop_table(entry->s->table_name.str);
+    else
+      entry->file->ha_delete_table(entry->s->table_name.str);
+    delete entry->file;
+  }
+
+  /* free blobs */
+  for (Field **ptr=entry->field ; *ptr ; ptr++)
+    (*ptr)->free();
+  free_io_cache(entry);
+
+  if (entry->temp_pool_slot != MY_BIT_NONE)
+    bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
+
+  plugin_unlock(0, entry->s->db_plugin);
+  entry->alias.free();
+
+  free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
+  thd_proc_info(thd, save_proc_info);
+
+  DBUG_VOID_RETURN;
+}
+
+
 /**
   @details
   Rows produced by a join sweep may end up in a temporary table or be sent
@@ -11494,14 +16503,14 @@ Next_select_func setup_end_select_func(JOIN *join)
   @retval
     -1  if error should be sent
 */
-
 static int
 do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 {
   int rc= 0;
   enum_nested_loop_state error= NESTED_LOOP_OK;
-  JOIN_TAB *join_tab= NULL;
+  JOIN_TAB *join_tab;
   DBUG_ENTER("do_select");
+  LINT_INIT(join_tab);
   
   join->procedure=procedure;
   join->tmp_table= table;			/* Save for easy recursion */
@@ -11518,26 +16527,30 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
       if (rc)
       {
         table->file->print_error(rc, MYF(0));
-        DBUG_RETURN(rc);
+        DBUG_RETURN(-1);
       }
     }
   }
   /* Set up select_end */
   Next_select_func end_select= setup_end_select_func(join);
-  if (join->tables)
+  if (join->table_count)
   {
-    join->join_tab[join->tables-1].next_select= end_select;
-
+    join->join_tab[join->top_join_tab_count - 1].next_select= end_select;
     join_tab=join->join_tab+join->const_tables;
   }
   join->send_records=0;
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
   {
     /*
       HAVING will be checked after processing aggregate functions,
-      But WHERE should checkd here (we alredy have read tables)
+      But WHERE should checked here (we alredy have read tables).
+      Notice that make_join_select() splits all conditions in this case
+      into two groups exec_const_cond and outer_ref_cond.
+      If join->table_count == join->const_tables then it is
+      sufficient to check only the condition pseudo_bits_cond.
     */
-    if (!join->conds || join->conds->val_int())
+    DBUG_ASSERT(join->outer_ref_cond == NULL);
+    if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int())
     {
       error= (*end_select)(join, 0, 0);
       if (error == NESTED_LOOP_OK || error == NESTED_LOOP_QUERY_LIMIT)
@@ -11553,9 +16566,12 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     }
     else if (join->send_row_on_empty_set())
     {
-      List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
-                                 fields);
-      rc= join->result->send_data(*columns_list);
+      if (!join->having || join->having->val_int())
+      {
+        List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
+                                   fields);
+        rc= join->result->send_data(*columns_list) > 0;
+      }
     }
     /*
       An error can happen when evaluating the conds 
@@ -11567,17 +16583,20 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
   }
   else
   {
-    DBUG_ASSERT(join->tables);
-    error= sub_select(join,join_tab,0);
-    if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS)
+    DBUG_ASSERT(join->table_count);
+    if (join->outer_ref_cond && !join->outer_ref_cond->val_int())
+      error= NESTED_LOOP_NO_MORE_ROWS;
+    else
+      error= sub_select(join,join_tab,0);
+    if ((error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS) &&
+        join->thd->killed != ABORT_QUERY)
       error= sub_select(join,join_tab,1);
     if (error == NESTED_LOOP_QUERY_LIMIT)
       error= NESTED_LOOP_OK;                    /* select_limit used */
   }
-  if (error == NESTED_LOOP_NO_MORE_ROWS)
+  if (error == NESTED_LOOP_NO_MORE_ROWS || join->thd->killed == ABORT_QUERY)
     error= NESTED_LOOP_OK;
 
-
   if (table)
   {
     int tmp, new_errno= 0;
@@ -11627,33 +16646,100 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 }
 
 
+int rr_sequential_and_unpack(READ_RECORD *info)
+{
+  int error;
+  if ((error= rr_sequential(info)))
+    return error;
+  
+  for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+    (*cp->do_copy)(cp);
+
+  return error;
+}
+
+
+/*
+  Fill the join buffer with partial records, retrieve all full  matches for them   
+
+  SYNOPSIS
+    sub_select_cache()
+      join     pointer to the structure providing all context info for the query
+      join_tab the first next table of the execution plan to be retrieved
+      end_records  true when we need to perform final steps of the retrieval
+
+  DESCRIPTION
+    For a given table Ti= join_tab from the sequence of tables of the chosen 
+    execution plan T1,...,Ti,...,Tn the function just put the partial record
+    t1,...,t[i-1] into the join buffer associated with table Ti unless this
+    is the last record added into the buffer. In this case,  the function 
+    additionally finds all matching full records for all partial
+    records accumulated in the buffer, after which it cleans the buffer up.
+    If a partial join record t1,...,ti is extended utilizing a dynamic
+    range scan then it is not put into the join buffer. Rather all matching
+    records are found for it at once by the function sub_select.
+
+  NOTES
+    The function implements the algorithmic schema for both Blocked Nested
+    Loop Join and Batched Key Access Join. The difference can be seen only at
+    the level of of the implementation of the put_record and join_records
+    virtual methods for the cache object associated with the join_tab.
+    The put_record method accumulates records in the cache, while the 
+    join_records method builds all matching join records and send them into
+    the output stream.  
+      
+  RETURN
+    return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
+*/ 
+
 enum_nested_loop_state
-sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
+sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
 {
   enum_nested_loop_state rc;
+  JOIN_CACHE *cache= join_tab->cache;
+  DBUG_ENTER("sub_select_cache");
+
+  /*
+    This function cannot be called if join_tab has no associated join
+    buffer
+  */
+  DBUG_ASSERT(cache != NULL);
+
+  join_tab->cache->reset_join(join);
 
   if (end_of_records)
   {
-    rc= flush_cached_records(join,join_tab,FALSE);
+    rc= cache->join_records(FALSE);
     if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
-      rc= sub_select(join,join_tab,end_of_records);
-    return rc;
+      rc= sub_select(join, join_tab, end_of_records);
+    DBUG_RETURN(rc);
   }
-  if (join->thd->killed)		// If aborted by user
+  if (join->thd->killed)
   {
+    /* The user has aborted the execution of the query */
     join->thd->send_kill_message();
-    return NESTED_LOOP_KILLED;                   /* purecov: inspected */
+    DBUG_RETURN(NESTED_LOOP_KILLED);
   }
-  if (join_tab->use_quick != 2 || test_if_quick_select(join_tab) <= 0)
+  if (!test_if_use_dynamic_range_scan(join_tab))
   {
-    if (!store_record_in_cache(&join_tab->cache))
-      return NESTED_LOOP_OK;                     // There is more room in cache
-    return flush_cached_records(join,join_tab,FALSE);
+    if (!cache->put_record())
+      DBUG_RETURN(NESTED_LOOP_OK); 
+    /* 
+      We has decided that after the record we've just put into the buffer
+      won't add any more records. Now try to find all the matching 
+      extensions for all records in the buffer.
+    */ 
+    rc= cache->join_records(FALSE);
+    DBUG_RETURN(rc);
   }
-  rc= flush_cached_records(join, join_tab, TRUE);
+  /*
+     TODO: Check whether we really need the call below and we can't do
+           without it. If it's not the case remove it.
+  */ 
+  rc= cache->join_records(TRUE);
   if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
     rc= sub_select(join, join_tab, end_of_records);
-  return rc;
+  DBUG_RETURN(rc);
 }
 
 /**
@@ -11779,13 +16865,36 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 enum_nested_loop_state
 sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 {
-  join_tab->table->null_row=0;
-  if (end_of_records)
-    return (*join_tab->next_select)(join,join_tab+1,end_of_records);
+  DBUG_ENTER("sub_select");
 
+  if (join_tab->last_inner)
+  {
+    JOIN_TAB *last_inner_tab= join_tab->last_inner;
+    for (JOIN_TAB  *jt= join_tab; jt <= last_inner_tab; jt++)
+      jt->table->null_row= 0;
+  }
+  else
+    join_tab->table->null_row=0;
+
+  if (end_of_records)
+  {
+    enum_nested_loop_state nls=
+      (*join_tab->next_select)(join,join_tab+1,end_of_records);
+    DBUG_RETURN(nls);
+  }
   int error;
-  enum_nested_loop_state rc;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
   READ_RECORD *info= &join_tab->read_record;
+   
+  for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table;
+       flush_dups_table;
+       flush_dups_table= flush_dups_table->next_flush_table)
+  {
+    flush_dups_table->sj_weedout_delete_rows();
+  }
+
+  if (!join_tab->preread_init_done && join_tab->preread_init())
+    DBUG_RETURN(NESTED_LOOP_ERROR);
 
   join->return_tab= join_tab;
 
@@ -11799,15 +16908,62 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 
     /* Set first_unmatched for the last inner table of this group */
     join_tab->last_inner->first_unmatched= join_tab;
+    if (join_tab->on_precond && !join_tab->on_precond->val_int())
+      rc= NESTED_LOOP_NO_MORE_ROWS;
   }
   join->thd->warning_info->reset_current_row_for_warning();
 
-  error= (*join_tab->read_first_record)(join_tab);
-  rc= evaluate_join_record(join, join_tab, error);
+  if (rc != NESTED_LOOP_NO_MORE_ROWS && 
+      (rc= join_tab_execution_startup(join_tab)) < 0)
+    DBUG_RETURN(rc);
+  
+  if (join_tab->loosescan_match_tab)
+    join_tab->loosescan_match_tab->found_match= FALSE;
 
-  while (rc == NESTED_LOOP_OK)
+  if (rc != NESTED_LOOP_NO_MORE_ROWS)
   {
+    error= (*join_tab->read_first_record)(join_tab);
+    if (!error && join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);    
+    rc= evaluate_join_record(join, join_tab, error);
+  }
+
+  /* 
+    Note: psergey has added the 2nd part of the following condition; the 
+    change should probably be made in 5.1, too.
+  */
+  bool skip_over= FALSE;
+  while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab)
+  {
+    if (join_tab->loosescan_match_tab && 
+        join_tab->loosescan_match_tab->found_match)
+    {
+      KEY *key= join_tab->table->key_info + join_tab->loosescan_key;
+      key_copy(join_tab->loosescan_buf, join_tab->table->record[0], key, 
+               join_tab->loosescan_key_len);
+      skip_over= TRUE;
+    }
+
     error= info->read_record(info);
+
+    if (skip_over && !error) 
+    {
+      if(!key_cmp(join_tab->table->key_info[join_tab->loosescan_key].key_part,
+                  join_tab->loosescan_buf, join_tab->loosescan_key_len))
+      {
+        /* 
+          This is the LooseScan action: skip over records with the same key
+          value if we already had a match for them.
+        */
+        continue;
+      }
+      join_tab->loosescan_match_tab->found_match= FALSE;
+      skip_over= FALSE;
+    }
+
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);
+    
     rc= evaluate_join_record(join, join_tab, error);
   }
 
@@ -11817,37 +16973,50 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 
   if (rc == NESTED_LOOP_NO_MORE_ROWS)
     rc= NESTED_LOOP_OK;
-  return rc;
+  DBUG_RETURN(rc);
 }
 
 
 /**
-  Process one record of the nested loop join.
-
-    This function will evaluate parts of WHERE/ON clauses that are
-    applicable to the partial record on hand and in case of success
-    submit this record to the next level of the nested loop.
+  @brief Process one row of the nested loop join.
+
+  This function will evaluate parts of WHERE/ON clauses that are
+  applicable to the partial row on hand and in case of success
+  submit this row to the next level of the nested loop.
+
+  @param  join     - The join object
+  @param  join_tab - The most inner join_tab being processed
+  @param  error > 0: Error, terminate processing
+                = 0: (Partial) row is available
+                < 0: No more rows available at this level
+  @return Nested loop state (Ok, No_more_rows, Error, Killed)
 */
 
 static enum_nested_loop_state
 evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
                      int error)
 {
-  bool not_used_in_distinct=join_tab->not_used_in_distinct;
+  bool shortcut_for_distinct= join_tab->shortcut_for_distinct;
   ha_rows found_records=join->found_records;
   COND *select_cond= join_tab->select_cond;
   bool select_cond_result= TRUE;
 
+  DBUG_ENTER("evaluate_join_record");
+  DBUG_PRINT("enter",
+             ("evaluate_join_record join: %p join_tab: %p"
+              " cond: %p error: %d", join, join_tab, select_cond, error));
   if (error > 0 || (join->thd->is_error()))     // Fatal error
-    return NESTED_LOOP_ERROR;
+    DBUG_RETURN(NESTED_LOOP_ERROR);
   if (error < 0)
-    return NESTED_LOOP_NO_MORE_ROWS;
+    DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
   if (join->thd->killed)			// Aborted by user
   {
     join->thd->send_kill_message();
-    return NESTED_LOOP_KILLED;               /* purecov: inspected */
+    DBUG_RETURN(NESTED_LOOP_KILLED);            /* purecov: inspected */
   }
-  DBUG_PRINT("info", ("select cond 0x%lx", (ulong)select_cond));
+
+  if (join_tab->table->vfield)
+    update_virtual_fields(join->thd, join_tab->table);
 
   if (select_cond)
   {
@@ -11855,7 +17024,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
 
     /* check for errors evaluating the condition */
     if (join->thd->is_error())
-      return NESTED_LOOP_ERROR;
+      DBUG_RETURN(NESTED_LOOP_ERROR);
   }
 
   if (!select_cond || select_cond_result)
@@ -11903,6 +17072,19 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
         {
           /* The condition attached to table tab is false */
 
+          if (tab == join_tab)
+          {
+            found= 0;
+          }            
+          else
+          {
+            /*
+              Set a return point if rejected predicate is attached
+              not to the last table of the current nest level.
+            */
+            join->return_tab= tab;
+          }
+
           if (tab->table->reginfo.not_exists_optimize)
           {
             /*
@@ -11911,19 +17093,11 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
               Any found 'tab' matches are known to evaluate to 'false'.
               Returning .._NO_MORE_ROWS will skip rem. 'tab' rows.
             */
-            return NESTED_LOOP_NO_MORE_ROWS;
+            DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
           }
-
-          if (tab == join_tab)
-            found= 0;
-          else
+          else if (tab != join_tab)
           {
-            /*
-              Set a return point if rejected predicate is attached
-              not to the last table of the current nest level.
-            */
-            join->return_tab= tab;
-            return NESTED_LOOP_OK;
+            DBUG_RETURN(NESTED_LOOP_OK);
           }
         }
       }
@@ -11937,6 +17111,26 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       join_tab->first_unmatched= first_unmatched;
     }
 
+    JOIN_TAB *return_tab= join->return_tab;
+    join_tab->found_match= TRUE;
+
+    if (join_tab->check_weed_out_table && found)
+    {
+      int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
+      if (res == -1)
+        DBUG_RETURN(NESTED_LOOP_ERROR);
+      else if (res == 1)
+        found= FALSE;
+    }
+    else if (join_tab->do_firstmatch)
+    {
+      /* 
+        We should return to the join_tab->do_firstmatch after we have 
+        enumerated all the suffixes for current prefix row combination
+      */
+      return_tab= join_tab->do_firstmatch;
+    }
+
     /*
       It was not just a return to lower loop level when one
       of the newly activated predicates is evaluated as false
@@ -11953,21 +17147,23 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       rc= (*join_tab->next_select)(join, join_tab+1, 0);
       join->thd->warning_info->inc_current_row_for_warning();
       if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
-        return rc;
+        DBUG_RETURN(rc);
+      if (return_tab < join->return_tab)
+        join->return_tab= return_tab;
 
       /* check for errors evaluating the condition */
       if (join->thd->is_error())
-        return NESTED_LOOP_ERROR;
+        DBUG_RETURN(NESTED_LOOP_ERROR);
 
       if (join->return_tab < join_tab)
-        return NESTED_LOOP_OK;
+        DBUG_RETURN(NESTED_LOOP_OK);
       /*
         Test if this was a SELECT DISTINCT query on a table that
         was not in the field list;  In this case we can abort if
         we found a row, as no new rows can be added to the result.
       */
-      if (not_used_in_distinct && found_records != join->found_records)
-        return NESTED_LOOP_NO_MORE_ROWS;
+      if (shortcut_for_distinct && found_records != join->found_records)
+        DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
     }
     else
     {
@@ -11985,10 +17181,9 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
     join->thd->warning_info->inc_current_row_for_warning();
     join_tab->read_record.unlock_row(join_tab);
   }
-  return NESTED_LOOP_OK;
+  DBUG_RETURN(NESTED_LOOP_OK);
 }
 
-
 /**
 
   @details
@@ -12049,103 +17244,32 @@ evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab)
   /*
     The row complemented by nulls satisfies all conditions
     attached to inner tables.
-    Send the row complemented by nulls to be joined with the
-    remaining tables.
   */
-  return (*join_tab->next_select)(join, join_tab+1, 0);
-}
-
-
-static enum_nested_loop_state
-flush_cached_records(JOIN *join,JOIN_TAB *join_tab,bool skip_last)
-{
-  enum_nested_loop_state rc= NESTED_LOOP_OK;
-  int error;
-  READ_RECORD *info;
-
-  join_tab->table->null_row= 0;
-  if (!join_tab->cache.records)
-    return NESTED_LOOP_OK;                      /* Nothing to do */
-  if (skip_last)
-    (void) store_record_in_cache(&join_tab->cache); // Must save this for later
-  if (join_tab->use_quick == 2)
-  {
-    if (join_tab->select->quick)
-    {					/* Used quick select last. reset it */
-      delete join_tab->select->quick;
-      join_tab->select->quick=0;
-    }
-  }
- /* read through all records */
-  if ((error=join_init_read_record(join_tab)))
+  if (join_tab->check_weed_out_table)
   {
-    reset_cache_write(&join_tab->cache);
-    return error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+    int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
+    if (res == -1)
+      return NESTED_LOOP_ERROR;
+    else if (res == 1)
+      return NESTED_LOOP_OK;
   }
-
-  for (JOIN_TAB *tmp=join->join_tab; tmp != join_tab ; tmp++)
+  else if (join_tab->do_firstmatch)
   {
-    tmp->status=tmp->table->status;
-    tmp->table->status=0;
+    /* 
+      We should return to the join_tab->do_firstmatch after we have 
+      enumerated all the suffixes for current prefix row combination
+    */
+    if (join_tab->do_firstmatch < join->return_tab)
+      join->return_tab= join_tab->do_firstmatch;
   }
 
-  info= &join_tab->read_record;
-  do
-  {
-    if (join->thd->killed)
-    {
-      join->thd->send_kill_message();
-      return NESTED_LOOP_KILLED; // Aborted by user /* purecov: inspected */
-    }
-    SQL_SELECT *select=join_tab->select;
-    if (rc == NESTED_LOOP_OK)
-    {
-      bool skip_record= FALSE;
-      if (join_tab->cache.select &&
-          join_tab->cache.select->skip_record(join->thd, &skip_record))
-      {
-        reset_cache_write(&join_tab->cache);
-        return NESTED_LOOP_ERROR;
-      }
-
-      if (!skip_record)
-      {
-        uint i;
-        reset_cache_read(&join_tab->cache);
-        for (i=(join_tab->cache.records- (skip_last ? 1 : 0)) ; i-- > 0 ;)
-        {
-          read_cached_record(join_tab);
-          skip_record= FALSE;
-          if (select && select->skip_record(join->thd, &skip_record))
-          {
-            reset_cache_write(&join_tab->cache);
-            return NESTED_LOOP_ERROR;
-          }
-          if (!skip_record)
-          {
-            rc= (join_tab->next_select)(join,join_tab+1,0);
-            if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
-            {
-              reset_cache_write(&join_tab->cache);
-              return rc;
-            }
-          }
-        }
-      }
-    }
-  } while (!(error=info->read_record(info)));
-
-  if (skip_last)
-    read_cached_record(join_tab);		// Restore current record
-  reset_cache_write(&join_tab->cache);
-  if (error > 0)				// Fatal error
-    return NESTED_LOOP_ERROR;                   /* purecov: inspected */
-  for (JOIN_TAB *tmp2=join->join_tab; tmp2 != join_tab ; tmp2++)
-    tmp2->table->status=tmp2->status;
-  return NESTED_LOOP_OK;
+  /*
+    Send the row complemented by nulls to be joined with the
+    remaining tables.
+  */
+  return (*join_tab->next_select)(join, join_tab+1, 0);
 }
 
-
 /*****************************************************************************
   The different ways to read a record
   Returns -1 if row was not found, 0 if row was found and 1 on errors
@@ -12165,9 +17289,14 @@ int report_error(TABLE *table, int error)
     print them to the .err log
   */
   if (error != HA_ERR_LOCK_DEADLOCK && error != HA_ERR_LOCK_WAIT_TIMEOUT
-      && !table->in_use->killed)
+      && error != HA_ERR_TABLE_DEF_CHANGED && !table->in_use->killed)
+  {
+    push_warning_printf(table->in_use, MYSQL_ERROR::WARN_LEVEL_WARN, error,
+                        "Got error %d when reading table %`s.%`s",
+                        error, table->s->db.str, table->s->table_name.str);
     sql_print_error("Got error %d when reading table '%s'",
 		    error, table->s->path.str);
+  }
   table->file->print_error(error,MYF(0));
   return 1;
 }
@@ -12177,26 +17306,56 @@ int safe_index_read(JOIN_TAB *tab)
 {
   int error;
   TABLE *table= tab->table;
-  if ((error=table->file->index_read_map(table->record[0],
-                                         tab->ref.key_buff,
-                                         make_prev_keypart_map(tab->ref.key_parts),
-                                         HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                             tab->ref.key_buff,
+                                             make_prev_keypart_map(tab->ref.key_parts),
+                                             HA_READ_KEY_EXACT)))
     return report_error(table, error);
   return 0;
 }
 
 
+/**
+  Reads content of constant table
+
+  @param tab  table
+  @param pos  position of table in query plan
+
+  @retval 0   ok, one row was found or one NULL-complemented row was created
+  @retval -1  ok, no row was found and no NULL-complemented row was created
+  @retval 1   error
+*/
+
 static int
 join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 {
   int error;
+  TABLE_LIST *tbl;
   DBUG_ENTER("join_read_const_table");
   TABLE *table=tab->table;
   table->const_table=1;
   table->null_row=0;
   table->status=STATUS_NO_RECORD;
   
-  if (tab->type == JT_SYSTEM)
+  if (tab->table->pos_in_table_list->is_materialized_derived() &&
+      !tab->table->pos_in_table_list->fill_me)
+  {
+    //TODO: don't get here at all
+    /* Skip materialized derived tables/views. */
+    DBUG_RETURN(0);
+  }
+  else if (tab->table->pos_in_table_list->jtbm_subselect && 
+          tab->table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
+  {
+    /* Row will not be found */
+    int res;
+    if (tab->table->pos_in_table_list->jtbm_subselect->jtbm_const_row_found)
+      res= 0;
+    else
+      res= -1;
+    DBUG_RETURN(res);
+  }
+  else if (tab->type == JT_SYSTEM)
   {
     if ((error=join_read_system(tab)))
     {						// Info for DESCRIBE
@@ -12207,6 +17366,11 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
       if (!table->pos_in_table_list->outer_join || error > 0)
 	DBUG_RETURN(error);
     }
+    /*
+      The optimizer trust the engine that when stats.records is 0, there
+      was no found rows
+    */
+    DBUG_ASSERT(table->file->stats.records > 0 || error);
   }
   else
   {
@@ -12214,11 +17378,11 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 	!table->no_keyread &&
         (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY)
     {
-      table->set_keyread(TRUE);
+      table->enable_keyread();
       tab->index= tab->ref.key;
     }
     error=join_read_const(tab);
-    table->set_keyread(FALSE);
+    table->disable_keyread();
     if (error)
     {
       tab->info="unique row not found";
@@ -12229,38 +17393,67 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 	DBUG_RETURN(error);
     }
   }
-  if (*tab->on_expr_ref && !table->null_row)
+  /* 
+     Evaluate an on-expression only if it is not considered expensive.
+     This mainly prevents executing subqueries in optimization phase.
+     This is necessary since proper setup for such execution has not been
+     done at this stage.
+  */
+  if (*tab->on_expr_ref && !table->null_row && 
+      !(*tab->on_expr_ref)->is_expensive())
   {
+#if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL)
+    /*
+      This test could be very useful to find bugs in the optimizer
+      where we would call this function with an expression that can't be
+      evaluated yet. We can't have this enabled by default as long as
+      have items like Item_equal, that doesn't report they are const but
+      they can still be called even if they contain not const items.
+    */
+    (*tab->on_expr_ref)->update_used_tables();
+    DBUG_ASSERT((*tab->on_expr_ref)->const_item());
+#endif
     if ((table->null_row= test((*tab->on_expr_ref)->val_int() == 0)))
       mark_as_null_row(table);  
   }
   if (!table->null_row)
     table->maybe_null=0;
 
-  /* Check appearance of new constant items in Item_equal objects */
-  JOIN *join= tab->join;
-  if (join->conds)
-    update_const_equal_items(join->conds, tab);
-  TABLE_LIST *tbl;
-  for (tbl= join->select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
   {
-    TABLE_LIST *embedded;
-    TABLE_LIST *embedding= tbl;
-    do
-    {
-      embedded= embedding;
-      if (embedded->on_expr)
-         update_const_equal_items(embedded->on_expr, tab);
-      embedding= embedded->embedding;
+    JOIN *join= tab->join;
+    List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
+    /* Check appearance of new constant items in Item_equal objects */
+    if (join->conds)
+      update_const_equal_items(join->conds, tab, TRUE);
+    while ((tbl= ti++))
+    {
+      TABLE_LIST *embedded;
+      TABLE_LIST *embedding= tbl;
+      do
+      {
+        embedded= embedding;
+        if (embedded->on_expr)
+           update_const_equal_items(embedded->on_expr, tab, TRUE);
+        embedding= embedded->embedding;
+      }
+      while (embedding &&
+             embedding->nested_join->join_list.head() == embedded);
     }
-    while (embedding &&
-           embedding->nested_join->join_list.head() == embedded);
   }
-
   DBUG_RETURN(0);
 }
 
 
+/**
+  Read a constant table when there is at most one matching row, using a table
+  scan.
+
+  @param tab			Table to read
+
+  @retval  0  Row was found
+  @retval  -1 Row was not found
+  @retval  1  Got an error (other than row not found) during read
+*/
 static int
 join_read_system(JOIN_TAB *tab)
 {
@@ -12268,8 +17461,8 @@ join_read_system(JOIN_TAB *tab)
   int error;
   if (table->status & STATUS_GARBAGE)		// If first read
   {
-    if ((error=table->file->read_first_row(table->record[0],
-					   table->s->primary_key)))
+    if ((error= table->file->ha_read_first_row(table->record[0],
+                                               table->s->primary_key)))
     {
       if (error != HA_ERR_END_OF_FILE)
 	return report_error(table, error);
@@ -12277,6 +17470,8 @@ join_read_system(JOIN_TAB *tab)
       empty_record(table);			// Make empty record
       return -1;
     }
+    if (table->vfield)
+      update_virtual_fields(tab->join->thd, table);
     store_record(table,record[1]);
   }
   else if (!table->status)			// Only happens with left join
@@ -12291,12 +17486,9 @@ join_read_system(JOIN_TAB *tab)
 
   @param tab			Table to read
 
-  @retval
-    0	Row was found
-  @retval
-    -1   Row was not found
-  @retval
-    1   Got an error (other than row not found) during read
+  @retval  0  Row was found
+  @retval  -1 Row was not found
+  @retval  1  Got an error (other than row not found) during read
 */
 
 static int
@@ -12311,10 +17503,10 @@ join_read_const(JOIN_TAB *tab)
       error=HA_ERR_KEY_NOT_FOUND;
     else
     {
-      error=table->file->index_read_idx_map(table->record[0],tab->ref.key,
-                                            (uchar*) tab->ref.key_buff,
-                                            make_prev_keypart_map(tab->ref.key_parts),
-                                            HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[0],tab->ref.key,
+                                                (uchar*) tab->ref.key_buff,
+                                                make_prev_keypart_map(tab->ref.key_parts),
+                                                HA_READ_KEY_EXACT);
     }
     if (error)
     {
@@ -12325,6 +17517,8 @@ join_read_const(JOIN_TAB *tab)
 	return report_error(table, error);
       return -1;
     }
+    if (table->vfield)
+      update_virtual_fields(tab->join->thd, table);
     store_record(table,record[1]);
   }
   else if (!(table->status & ~STATUS_NULL_ROW))	// Only happens with left join
@@ -12336,26 +17530,54 @@ join_read_const(JOIN_TAB *tab)
   return table->status ? -1 : 0;
 }
 
+/*
+  eq_ref access method implementation: "read_first" function
+
+  SYNOPSIS
+    join_read_key()
+      tab  JOIN_TAB of the accessed table
+
+  DESCRIPTION
+    This is "read_fist" function for the eq_ref access method. The difference
+    from ref access function is that is that it has a one-element lookup 
+    cache (see cmp_buffer_with_ref)
+
+  RETURN
+    0  - Ok
+   -1  - Row not found 
+    1  - Error
+*/
+
 
 static int
 join_read_key(JOIN_TAB *tab)
 {
-  int error;
-  TABLE *table= tab->table;
+  return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref);
+}
+
 
+/*
+  eq_ref access handler but generalized a bit to support TABLE and TABLE_REF
+  not from the join_tab. See join_read_key for detailed synopsis.
+*/
+int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
+{
+  int error;
   if (!table->file->inited)
   {
-    if ((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))
+    error= table->file->ha_index_init(table_ref->key, tab ? tab->sorted : TRUE);
+    if (error)
     {
       (void) report_error(table, error);
       return 1;
     }
   }
 
-  if (cmp_buffer_with_ref(tab) ||
+  /* TODO: Why don't we do "Late NULLs Filtering" here? */
+  if (cmp_buffer_with_ref(thd, table, table_ref) ||
       (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
   {
-    if (tab->ref.key_err)
+    if (table_ref->key_err)
     {
       table->status=STATUS_NOT_FOUND;
       return -1;
@@ -12364,28 +17586,28 @@ join_read_key(JOIN_TAB *tab)
       Moving away from the current record. Unlock the row
       in the handler if it did not match the partial WHERE.
     */
-    if (tab->ref.has_record && tab->ref.use_count == 0)
+    if (tab && tab->ref.has_record && tab->ref.use_count == 0)
     {
-      tab->read_record.file->unlock_row();
-      tab->ref.has_record= FALSE;
+      tab->read_record.table->file->unlock_row();
+      table_ref->has_record= FALSE;
     }
-    error=table->file->index_read_map(table->record[0],
-                                      tab->ref.key_buff,
-                                      make_prev_keypart_map(tab->ref.key_parts),
-                                      HA_READ_KEY_EXACT);
+    error=table->file->ha_index_read_map(table->record[0],
+                                  table_ref->key_buff,
+                                  make_prev_keypart_map(table_ref->key_parts),
+                                  HA_READ_KEY_EXACT);
     if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
 
     if (! error)
     {
-      tab->ref.has_record= TRUE;
-      tab->ref.use_count= 1;
+      table_ref->has_record= TRUE;
+      table_ref->use_count= 1;
     }
   }
   else if (table->status == 0)
   {
-    DBUG_ASSERT(tab->ref.has_record);
-    tab->ref.use_count++;
+    DBUG_ASSERT(table_ref->has_record);
+    table_ref->use_count++;
   }
   table->null_row=0;
   return table->status ? -1 : 0;
@@ -12435,27 +17657,26 @@ join_read_always_key(JOIN_TAB *tab)
   TABLE *table= tab->table;
 
   /* Initialize the index first */
-  if (!table->file->inited &&
-      (error= table->file->ha_index_init(tab->ref.key, tab->sorted)))
-  {
-    (void) report_error(table, error);
-    return 1;
-  }
- 
-  /* Perform "Late NULLs Filtering" (see internals manual for explanations) */
-  for (uint i= 0 ; i < tab->ref.key_parts ; i++)
+  if (!table->file->inited)
   {
-    if ((tab->ref.null_rejecting & ((key_part_map)1 << i)) &&
-        tab->ref.items[i]->is_null())
-        return -1;
+    if ((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))
+    {
+      (void) report_error(table, error);
+      return 1;
+    }
   }
 
   if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
     return -1;
-  if ((error=table->file->index_read_map(table->record[0],
-                                         tab->ref.key_buff,
-                                         make_prev_keypart_map(tab->ref.key_parts),
-                                         HA_READ_KEY_EXACT)))
+  if ((error= table->file->prepare_index_key_scan_map(tab->ref.key_buff, make_prev_keypart_map(tab->ref.key_parts)))) 
+  {
+    report_error(table,error);
+    return -1;
+  }
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                             tab->ref.key_buff,
+                                             make_prev_keypart_map(tab->ref.key_parts),
+                                             HA_READ_KEY_EXACT)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12485,9 +17706,15 @@ join_read_last_key(JOIN_TAB *tab)
 
   if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
     return -1;
-  if ((error=table->file->index_read_last_map(table->record[0],
-                                              tab->ref.key_buff,
-                                              make_prev_keypart_map(tab->ref.key_parts))))
+  if ((error= table->file->prepare_index_key_scan_map(tab->ref.key_buff, make_prev_keypart_map(tab->ref.key_parts)))) 
+  {
+    report_error(table,error);
+    return -1;
+  }
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                            tab->ref.key_buff,
+                                     make_prev_keypart_map(tab->ref.key_parts),
+                                            HA_READ_PREFIX_LAST)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12512,9 +17739,9 @@ join_read_next_same(READ_RECORD *info)
   TABLE *table= info->table;
   JOIN_TAB *tab=table->reginfo.join_tab;
 
-  if ((error=table->file->index_next_same(table->record[0],
-					  tab->ref.key_buff,
-					  tab->ref.key_length)))
+  if ((error= table->file->ha_index_next_same(table->record[0],
+                                              tab->ref.key_buff,
+                                              tab->ref.key_length)))
   {
     if (error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12532,7 +17759,7 @@ join_read_prev_same(READ_RECORD *info)
   TABLE *table= info->table;
   JOIN_TAB *tab=table->reginfo.join_tab;
 
-  if ((error=table->file->index_prev(table->record[0])))
+  if ((error= table->file->ha_index_prev(table->record[0])))
     return report_error(table, error);
   if (key_cmp_if_same(table, tab->ref.key_buff, tab->ref.key,
                       tab->ref.key_length))
@@ -12555,7 +17782,7 @@ join_init_quick_read_record(JOIN_TAB *tab)
 
 int read_first_record_seq(JOIN_TAB *tab)
 {
-  if (tab->read_record.file->ha_rnd_init(1))
+  if (tab->read_record.table->file->ha_rnd_init_with_error(1))
     return 1;
   return (*tab->read_record.read_record)(&tab->read_record);
 }
@@ -12566,49 +17793,77 @@ test_if_quick_select(JOIN_TAB *tab)
   delete tab->select->quick;
   tab->select->quick=0;
   return tab->select->test_quick_select(tab->join->thd, tab->keys,
-					(table_map) 0, HA_POS_ERROR, 0);
+					(table_map) 0, HA_POS_ERROR, 0,
+                                        FALSE);
 }
 
 
-static int
-join_init_read_record(JOIN_TAB *tab)
+static 
+bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
+{
+    return (join_tab->use_quick == 2 && test_if_quick_select(join_tab) > 0);
+}
+
+int join_init_read_record(JOIN_TAB *tab)
 {
   if (tab->select && tab->select->quick && tab->select->quick->reset())
     return 1;
+  if (!tab->preread_init_done && tab->preread_init())
+    return 1;
+  if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
+                       tab->select,1,1, FALSE))
+    return 1;
+  return (*tab->read_record.read_record)(&tab->read_record);
+}
+
+int
+join_read_record_no_init(JOIN_TAB *tab)
+{
+  Copy_field *save_copy, *save_copy_end;
+  
+  /*
+    init_read_record resets all elements of tab->read_record().
+    Remember things that we don't want to have reset.
+  */
+  save_copy=     tab->read_record.copy_field;
+  save_copy_end= tab->read_record.copy_field_end;
+  
   init_read_record(&tab->read_record, tab->join->thd, tab->table,
 		   tab->select,1,1, FALSE);
+
+  tab->read_record.copy_field=     save_copy;
+  tab->read_record.copy_field_end= save_copy_end;
+  tab->read_record.read_record= rr_sequential_and_unpack;
+
   return (*tab->read_record.read_record)(&tab->read_record);
 }
 
-
 static int
 join_read_first(JOIN_TAB *tab)
 {
-  int error;
+  int error= 0;
   TABLE *table=tab->table;
-  if (table->covering_keys.is_set(tab->index) && !table->no_keyread)
-    table->set_keyread(TRUE);
+  DBUG_ENTER("join_read_first");
+
+  if (table->covering_keys.is_set(tab->index) && !table->no_keyread &&
+      !table->key_read)
+    table->enable_keyread();
   tab->table->status=0;
   tab->read_record.read_record=join_read_next;
   tab->read_record.table=table;
-  tab->read_record.file=table->file;
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
-
-  if (!table->file->inited &&
-      (error= table->file->ha_index_init(tab->index, tab->sorted)))
-  {
-    (void) report_error(table, error);
-    return 1;
-  }
-
-  if ((error=tab->table->file->index_first(tab->table->record[0])))
+  if (!table->file->inited)
+    error= table->file->ha_index_init(tab->index, tab->sorted);
+  if (!error)
+    error= table->file->prepare_index_scan();
+  if (error || (error=tab->table->file->ha_index_first(tab->table->record[0])))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       report_error(table, error);
-    return -1;
+    DBUG_RETURN(-1);
   }
-  return 0;
+  DBUG_RETURN(0);
 }
 
 
@@ -12616,8 +17871,9 @@ static int
 join_read_next(READ_RECORD *info)
 {
   int error;
-  if ((error=info->file->index_next(info->record)))
+  if ((error= info->table->file->ha_index_next(info->record)))
     return report_error(info->table, error);
+
   return 0;
 }
 
@@ -12626,25 +17882,25 @@ static int
 join_read_last(JOIN_TAB *tab)
 {
   TABLE *table=tab->table;
-  int error;
-  if (table->covering_keys.is_set(tab->index) && !table->no_keyread)
-    table->set_keyread(TRUE);
+  int error= 0;
+  DBUG_ENTER("join_read_first");
+
+  if (table->covering_keys.is_set(tab->index) && !table->no_keyread &&
+      !table->key_read)
+    table->enable_keyread();
   tab->table->status=0;
   tab->read_record.read_record=join_read_prev;
   tab->read_record.table=table;
-  tab->read_record.file=table->file;
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
-  if (!table->file->inited &&
-      (error= table->file->ha_index_init(tab->index, 1)))
-  {
-    (void) report_error(table, error);
-    return 1;
-  }
+  if (!table->file->inited)
+    error= table->file->ha_index_init(tab->index, 1);
+  if (!error)
+    error= table->file->prepare_index_scan();
+  if (error || (error= tab->table->file->ha_index_last(tab->table->record[0])))
+    DBUG_RETURN(report_error(table, error));
 
-  if ((error= tab->table->file->index_last(tab->table->record[0])))
-    return report_error(table, error);
-  return 0;
+  DBUG_RETURN(0);
 }
 
 
@@ -12652,7 +17908,7 @@ static int
 join_read_prev(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->index_prev(info->record)))
+  if ((error= info->table->file->ha_index_prev(info->record)))
     return report_error(info->table, error);
   return 0;
 }
@@ -12673,7 +17929,7 @@ join_ft_read_first(JOIN_TAB *tab)
 
   table->file->ft_init();
 
-  if ((error= table->file->ft_read(table->record[0])))
+  if ((error= table->file->ha_ft_read(table->record[0])))
     return report_error(table, error);
   return 0;
 }
@@ -12682,7 +17938,7 @@ static int
 join_ft_read_next(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->ft_read(info->table->record[0])))
+  if ((error= info->table->file->ha_ft_read(info->table->record[0])))
     return report_error(info->table, error);
   return 0;
 }
@@ -12757,9 +18013,14 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   DBUG_ENTER("end_send");
   if (!end_of_records)
   {
-    int error;
-    if (join->tables &&
-        join->join_tab->is_using_loose_index_scan())
+    if (join->table_count &&
+        (join->join_tab->is_using_loose_index_scan() ||
+         /*
+           When order by used a loose scan as its input, the quick select may
+           be attached to pre_sort_join_tab.
+         */
+         (join->pre_sort_join_tab &&
+          join->pre_sort_join_tab->is_using_loose_index_scan())))
     {
       /* Copy non-aggregated fields when loose index scan is used. */
       copy_fields(&join->tmp_table_param);
@@ -12772,18 +18033,20 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
         DBUG_RETURN(NESTED_LOOP_ERROR);
       DBUG_RETURN(NESTED_LOOP_OK);
     }
-    error=0;
     if (join->do_send_rows)
-      error=join->result->send_data(*join->fields);
-    if (error)
-      DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    {
+      int error;
+      /* result < 0 if row was not accepted and should not be counted */
+      if ((error= join->result->send_data(*join->fields)))
+        DBUG_RETURN(error < 0 ? NESTED_LOOP_OK : NESTED_LOOP_ERROR);
+    }
     if (++join->send_records >= join->unit->select_limit_cnt &&
 	join->do_send_rows)
     {
       if (join->select_options & OPTION_FOUND_ROWS)
       {
 	JOIN_TAB *jt=join->join_tab;
-	if ((join->tables == 1) && !join->tmp_table && !join->sort_and_group
+	if ((join->table_count == 1) && !join->tmp_table && !join->sort_and_group
 	    && !join->send_group_parts && !join->having && !jt->select_cond &&
 	    !(jt->select && jt->select->quick) &&
 	    (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
@@ -12834,7 +18097,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
 
 	/* ARGSUSED */
-static enum_nested_loop_state
+enum_nested_loop_state
 end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	       bool end_of_records)
 {
@@ -12872,8 +18135,11 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  {
             List_iterator_fast<Item> it(*join->fields);
             Item *item;
+            DBUG_PRINT("info", ("no matching rows"));
+
 	    /* No matching rows for group function */
 	    join->clear();
+            join->no_rows_in_result_called= 1;
 
             while ((item= it++))
               item->no_rows_in_result();
@@ -12883,7 +18149,15 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  else
 	  {
 	    if (join->do_send_rows)
-	      error=join->result->send_data(*join->fields) ? 1 : 0;
+            {
+	      error= join->result->send_data(*join->fields);
+              if (error < 0)
+              {
+                /* Duplicate row, don't count */
+                join->send_records--;
+                error= 0;
+              }
+            }
 	    join->send_records++;
 	  }
 	  if (join->rollup.state != ROLLUP::STATE_NONE && error <= 0)
@@ -12955,11 +18229,6 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   TABLE *table=join->tmp_table;
   DBUG_ENTER("end_write");
 
-  if (join->thd->killed)			// Aborted by user
-  {
-    join->thd->send_kill_message();
-    DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
-  }
   if (!end_of_records)
   {
     copy_fields(&join->tmp_table_param);
@@ -12970,13 +18239,18 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     {
       int error;
       join->found_records++;
-      if ((error=table->file->ha_write_row(table->record[0])))
+      if ((error= table->file->ha_write_tmp_row(table->record[0])))
       {
         if (!table->file->is_fatal_error(error, HA_CHECK_DUP))
 	  goto end;
-	if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
-				    error,1))
+        bool is_duplicate;
+	if (create_internal_tmp_table_from_heap(join->thd, table, 
+                                                join->tmp_table_param.start_recinfo,
+                                                &join->tmp_table_param.recinfo,
+                                                error, 1, &is_duplicate))
 	  DBUG_RETURN(NESTED_LOOP_ERROR);        // Not a table_is_full error
+        if (is_duplicate)
+          goto end;
 	table->s->uniques=0;			// To ensure rows are the same
       }
       if (++join->send_records >= join->tmp_table_param.end_write_records &&
@@ -12986,11 +18260,15 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
 	join->do_send_rows=0;
 	join->unit->select_limit_cnt = HA_POS_ERROR;
-	DBUG_RETURN(NESTED_LOOP_OK);
       }
     }
   }
 end:
+  if (join->thd->killed)
+  {
+    join->thd->send_kill_message();
+    DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
+  }
   DBUG_RETURN(NESTED_LOOP_OK);
 }
 
@@ -13008,11 +18286,6 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
   if (end_of_records)
     DBUG_RETURN(NESTED_LOOP_OK);
-  if (join->thd->killed)			// Aborted by user
-  {
-    join->thd->send_kill_message();
-    DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
-  }
 
   join->found_records++;
   copy_fields(&join->tmp_table_param);		// Groups are copied twice.
@@ -13025,42 +18298,31 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     if (item->maybe_null)
       group->buff[-1]= (char) group->field->is_null();
   }
-  if (!table->file->index_read_map(table->record[1],
-                                   join->tmp_table_param.group_buff,
-                                   HA_WHOLE_KEY,
-                                   HA_READ_KEY_EXACT))
+  if (!table->file->ha_index_read_map(table->record[1],
+                                      join->tmp_table_param.group_buff,
+                                      HA_WHOLE_KEY,
+                                      HA_READ_KEY_EXACT))
   {						/* Update old record */
     restore_record(table,record[1]);
     update_tmptable_sum_func(join->sum_funcs,table);
-    if ((error=table->file->ha_update_row(table->record[1],
-                                          table->record[0])))
+    if ((error= table->file->ha_update_tmp_row(table->record[1],
+                                               table->record[0])))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
     }
-    DBUG_RETURN(NESTED_LOOP_OK);
+    goto end;
   }
 
-  /*
-    Copy null bits from group key to table
-    We can't copy all data as the key may have different format
-    as the row data (for example as with VARCHAR keys)
-  */
-  KEY_PART_INFO *key_part;
-  for (group=table->group,key_part=table->key_info[0].key_part;
-       group ;
-       group=group->next,key_part++)
-  {
-    if (key_part->null_bit)
-      memcpy(table->record[0]+key_part->offset, group->buff, 1);
-  }
   init_tmptable_sum_functions(join->sum_funcs);
   if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
-  if ((error=table->file->ha_write_row(table->record[0])))
+  if ((error= table->file->ha_write_tmp_row(table->record[0])))
   {
-    if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
-				error, 0))
+    if (create_internal_tmp_table_from_heap(join->thd, table,
+                                            join->tmp_table_param.start_recinfo,
+                                            &join->tmp_table_param.recinfo,
+                                            error, 0, NULL))
       DBUG_RETURN(NESTED_LOOP_ERROR);            // Not a table_is_full error
     /* Change method to update rows */
     if ((error= table->file->ha_index_init(0, 0)))
@@ -13069,9 +18331,15 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       DBUG_RETURN(NESTED_LOOP_ERROR);
     }
 
-    join->join_tab[join->tables-1].next_select=end_unique_update;
+    join->join_tab[join->top_join_tab_count-1].next_select=end_unique_update;
   }
   join->send_records++;
+end:
+  if (join->thd->killed)
+  {
+    join->thd->send_kill_message();
+    DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
+  }
   DBUG_RETURN(NESTED_LOOP_OK);
 }
 
@@ -13088,18 +18356,13 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
   if (end_of_records)
     DBUG_RETURN(NESTED_LOOP_OK);
-  if (join->thd->killed)			// Aborted by user
-  {
-    join->thd->send_kill_message();
-    DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
-  }
 
   init_tmptable_sum_functions(join->sum_funcs);
   copy_fields(&join->tmp_table_param);		// Groups are copied twice.
   if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
 
-  if (!(error=table->file->ha_write_row(table->record[0])))
+  if (!(error= table->file->ha_write_tmp_row(table->record[0])))
     join->send_records++;			// New group
   else
   {
@@ -13108,26 +18371,31 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
     }
-    if (table->file->rnd_pos(table->record[1],table->file->dup_ref))
+    if (table->file->ha_rnd_pos(table->record[1],table->file->dup_ref))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
     }
     restore_record(table,record[1]);
     update_tmptable_sum_func(join->sum_funcs,table);
-    if ((error=table->file->ha_update_row(table->record[1],
-                                          table->record[0])))
+    if ((error= table->file->ha_update_tmp_row(table->record[1],
+                                               table->record[0])))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
     }
   }
+  if (join->thd->killed)
+  {
+    join->thd->send_kill_message();
+    DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
+  }
   DBUG_RETURN(NESTED_LOOP_OK);
 }
 
 
 	/* ARGSUSED */
-static enum_nested_loop_state
+enum_nested_loop_state
 end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 		bool end_of_records)
 {
@@ -13135,11 +18403,6 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   int	  idx= -1;
   DBUG_ENTER("end_write_group");
 
-  if (join->thd->killed)
-  {						// Aborted by user
-    join->thd->send_kill_message();
-    DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
-  }
   if (!join->first_record || end_of_records ||
       (idx=test_if_group_changed(join->group_fields)) >= 0)
   {
@@ -13159,10 +18422,12 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
                        join->sum_funcs_end[send_group_parts]);
 	if (!join->having || join->having->val_int())
 	{
-          int error= table->file->ha_write_row(table->record[0]);
-          if (error && create_myisam_from_heap(join->thd, table,
-                                               &join->tmp_table_param,
-                                               error, 0))
+          int error= table->file->ha_write_tmp_row(table->record[0]);
+          if (error && 
+              create_internal_tmp_table_from_heap(join->thd, table,
+                                                  join->tmp_table_param.start_recinfo,
+                                                  &join->tmp_table_param.recinfo,
+                                                  error, 0, NULL))
 	    DBUG_RETURN(NESTED_LOOP_ERROR);
         }
         if (join->rollup.state != ROLLUP::STATE_NONE)
@@ -13171,13 +18436,13 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	    DBUG_RETURN(NESTED_LOOP_ERROR);
 	}
 	if (end_of_records)
-	  DBUG_RETURN(NESTED_LOOP_OK);
+	  goto end;
       }
     }
     else
     {
       if (end_of_records)
-	DBUG_RETURN(NESTED_LOOP_OK);
+        goto end;
       join->first_record=1;
       (void) test_if_group_changed(join->group_fields);
     }
@@ -13190,13 +18455,19 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	DBUG_RETURN(NESTED_LOOP_ERROR);
       if (join->procedure)
 	join->procedure->add();
-      DBUG_RETURN(NESTED_LOOP_OK);
+      goto end;
     }
   }
   if (update_sum_func(join->sum_funcs))
     DBUG_RETURN(NESTED_LOOP_ERROR);
   if (join->procedure)
     join->procedure->add();
+end:
+  if (join->thd->killed)
+  {
+    join->thd->send_kill_message();
+    DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
+  }
   DBUG_RETURN(NESTED_LOOP_OK);
 }
 
@@ -13210,18 +18481,49 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 *****************************************************************************/
 
 /**
-  @return
-    1 if right_item is used removable reference key on left_item
+  Check if "left_item=right_item" equality is guaranteed to be true by use of
+  [eq]ref access on left_item->field->table.
+
+  SYNOPSIS
+    test_if_ref()
+      root_cond
+      left_item
+      right_item
+
+  DESCRIPTION
+    Check if the given "left_item = right_item" equality is guaranteed to be
+    true by use of [eq_]ref access method.
+
+    We need root_cond as we can't remove ON expressions even if employed ref 
+    access guarantees that they are true. This is because  TODO
+
+  RETURN
+    TRUE   if right_item is used removable reference key on left_item
+    FALSE  Otherwise
+    
 */
 
-static bool test_if_ref(Item_field *left_item,Item *right_item)
+bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
 {
   Field *field=left_item->field;
-  // No need to change const test. We also have to keep tests on LEFT JOIN
-  if (!field->table->const_table && !field->table->maybe_null)
+  JOIN_TAB *join_tab= field->table->reginfo.join_tab;
+  // No need to change const test
+  if (!field->table->const_table && join_tab &&
+      !join_tab->is_ref_for_hash_join() &&
+      (!join_tab->first_inner ||
+       *join_tab->first_inner->on_expr_ref == root_cond))
   {
+    /*
+      If ref access uses "Full scan on NULL key" (i.e. it actually alternates
+      between ref access and full table scan), then no equality can be
+      guaranteed to be true.
+    */
+    if (join_tab->ref.is_access_triggered())
+      return FALSE;
+
     Item *ref_item=part_of_refkey(field->table,field);
-    if (ref_item && ref_item->eq(right_item,1))
+    if (ref_item && (ref_item->eq(right_item,1) || 
+		     ref_item->real_item()->eq(right_item,1)))
     {
       right_item= right_item->real_item();
       if (right_item->type() == Item::FIELD_ITEM)
@@ -13233,7 +18535,7 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
       {
 	/*
 	  We can remove binary fields and numerical fields except float,
-	  as float comparison isn't 100 % secure
+	  as float comparison isn't 100 % safe
 	  We have to keep normal strings to be able to check for end spaces
 	*/
 	if (field->binary() &&
@@ -13241,7 +18543,7 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
 	    field->real_type() != MYSQL_TYPE_VARCHAR &&
 	    (field->type() != MYSQL_TYPE_FLOAT || field->decimals() == 0))
 	{
-	  return !store_val_in_field(field, right_item, CHECK_FIELD_WARN);
+	  return !right_item->save_in_field_no_warnings(field, 1);
 	}
       }
     }
@@ -13249,14 +18551,24 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
   return 0;					// keep test
 }
 
+
 /**
    Extract a condition that can be checked after reading given table
+   @fn make_cond_for_table()
 
    @param cond       Condition to analyze
    @param tables     Tables for which "current field values" are available
-   @param used_table Table that we're extracting the condition for (may
-                     also include PSEUDO_TABLE_BITS, and may be zero)
-   @param exclude_expensive_cond  Do not push expensive conditions
+   @param used_table Table that we're extracting the condition for
+      tables       Tables for which "current field values" are available (this
+                   includes used_table)
+                   (may  also include PSEUDO_TABLE_BITS, and may be zero)
+   @param join_tab_idx_arg
+		     The index of the JOIN_TAB this Item is being extracted
+                     for. MAX_TABLES if there is no corresponding JOIN_TAB.
+   @param exclude_expensive_cond
+		     Do not push expensive conditions
+   @param retain_ref_cond
+                     Retain ref conditions
 
    @retval <>NULL Generated condition
    @retval =NULL  Already checked, OR error
@@ -13286,11 +18598,32 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
      make_cond_for_info_schema() uses similar algorithm as well.
 */ 
 
-static COND *
-make_cond_for_table(COND *cond, table_map tables, table_map used_table)
+static Item *
+make_cond_for_table(THD *thd, Item *cond, table_map tables,
+                    table_map used_table,
+                    int join_tab_idx_arg,
+                    bool exclude_expensive_cond __attribute__((unused)),
+		    bool retain_ref_cond)
+{
+  return make_cond_for_table_from_pred(thd, cond, cond, tables, used_table,
+                                       join_tab_idx_arg,
+                                       exclude_expensive_cond,
+                                       retain_ref_cond);
+}
+
+
+static Item *
+make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
+                              table_map tables, table_map used_table,
+                              int join_tab_idx_arg,
+                              bool exclude_expensive_cond __attribute__
+                              ((unused)),
+                              bool retain_ref_cond)
+
 {
   if (used_table && !(cond->used_tables() & used_table))
     return (COND*) 0;				// Already checked
+
   if (cond->type() == Item::COND_ITEM)
   {
     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
@@ -13303,7 +18636,138 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
       Item *item;
       while ((item=li++))
       {
-	Item *fix=make_cond_for_table(item,tables,used_table);
+	Item *fix=make_cond_for_table_from_pred(thd, root_cond, item, 
+                                                tables, used_table,
+						join_tab_idx_arg,
+                                                exclude_expensive_cond,
+                                                retain_ref_cond);
+	if (fix)
+	  new_cond->argument_list()->push_back(fix);
+      }
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;			// Always true
+      case 1:
+	return new_cond->argument_list()->head();
+      default:
+	/*
+          Call fix_fields to propagate all properties of the children to
+          the new parent Item. This should not be expensive because all
+	  children of Item_cond_and should be fixed by now.
+	*/
+	new_cond->fix_fields(thd, 0);
+	new_cond->used_tables_cache=
+	  ((Item_cond_and*) cond)->used_tables_cache &
+	  tables;
+	return new_cond;
+      }
+    }
+    else
+    {						// Or list
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;			// OOM /* purecov: inspected */
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
+                                                tables, 0L,
+                                                join_tab_idx_arg,
+                                                exclude_expensive_cond,
+                                                retain_ref_cond);
+	if (!fix)
+	  return (COND*) 0;			// Always true
+	new_cond->argument_list()->push_back(fix);
+      }
+      /*
+        Call fix_fields to propagate all properties of the children to
+        the new parent Item. This should not be expensive because all
+        children of Item_cond_and should be fixed by now.
+      */
+      new_cond->fix_fields(thd, 0);
+      new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+
+  /*
+    Because the following test takes a while and it can be done
+    table_count times, we mark each item that we have examined with the result
+    of the test
+  */
+  if ((cond->marker == 3 && !retain_ref_cond) ||
+      (cond->used_tables() & ~tables))
+    return (COND*) 0;				// Can't check this yet
+
+  if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
+  {
+    cond->set_join_tab_idx(join_tab_idx_arg);
+    return cond;				// Not boolean op
+  }
+
+  if (cond->type() == Item::FUNC_ITEM && 
+      ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
+  {
+    Item *left_item=	((Item_func*) cond)->arguments()[0]->real_item();
+    Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
+    if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
+	test_if_ref(root_cond, (Item_field*) left_item,right_item))
+    {
+      cond->marker=3;			// Checked when read
+      return (COND*) 0;
+    }
+    if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
+	test_if_ref(root_cond, (Item_field*) right_item,left_item))
+    {
+      cond->marker=3;			// Checked when read
+      return (COND*) 0;
+    }
+  }
+  cond->marker=2;
+  cond->set_join_tab_idx(join_tab_idx_arg);
+  return cond;
+}
+
+
+/*
+  The difference of this from make_cond_for_table() is that we're in the
+  following state:
+    1. conditions referring to 'tables' have been checked
+    2. conditions referring to sjm_tables have been checked, too
+    3. We need condition that couldn't be checked in #1 or #2 but 
+       can be checked when we get both (tables | sjm_tables).
+
+*/
+static COND *
+make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, 
+                    table_map sjm_tables, bool inside_or_clause)
+{
+  /*
+    We assume that conditions that refer to only join prefix tables or 
+    sjm_tables have already been checked.
+  */
+  if (!inside_or_clause && 
+      (!(cond->used_tables() & ~tables) || 
+       !(cond->used_tables() & ~sjm_tables)))
+    return (COND*) 0;				// Already checked
+
+  /* AND/OR recursive descent */
+  if (cond->type() == Item::COND_ITEM)
+  {
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      /* Create new top level AND item */
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;			// OOM /* purecov: inspected */
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix=make_cond_after_sjm(root_cond, item, tables, sjm_tables, 
+                                      inside_or_clause);
 	if (fix)
 	  new_cond->argument_list()->push_back(fix);
       }
@@ -13333,7 +18797,8 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
       Item *item;
       while ((item=li++))
       {
-	Item *fix=make_cond_for_table(item,tables,0L);
+	Item *fix= make_cond_after_sjm(root_cond, item, tables, sjm_tables,
+                                       /*inside_or_clause= */TRUE);
 	if (!fix)
 	  return (COND*) 0;			// Always true
 	new_cond->argument_list()->push_back(fix);
@@ -13355,23 +18820,27 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
     of the test
   */
 
-  if (cond->marker == 3 || (cond->used_tables() & ~tables))
+  if (cond->marker == 3 || (cond->used_tables() & ~(tables | sjm_tables)))
     return (COND*) 0;				// Can't check this yet
   if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
     return cond;				// Not boolean op
 
+  /* 
+    Remove equalities that are guaranteed to be true by use of 'ref' access
+    method
+  */
   if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
   {
-    Item *left_item=	((Item_func*) cond)->arguments()[0];
-    Item *right_item= ((Item_func*) cond)->arguments()[1];
+    Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
+    Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
     if (left_item->type() == Item::FIELD_ITEM &&
-	test_if_ref((Item_field*) left_item,right_item))
+	test_if_ref(root_cond, (Item_field*) left_item,right_item))
     {
       cond->marker=3;			// Checked when read
       return (COND*) 0;
     }
     if (right_item->type() == Item::FIELD_ITEM &&
-	test_if_ref((Item_field*) right_item,left_item))
+	test_if_ref(root_cond, (Item_field*) right_item,left_item))
     {
       cond->marker=3;			// Checked when read
       return (COND*) 0;
@@ -13381,22 +18850,62 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
   return cond;
 }
 
+
+/*
+  @brief
+
+  Check if
+   - @table uses "ref"-like access 
+   - it is based on "@field=certain_item" equality
+   - the equality will be true for any record returned by the access method
+  and return the certain_item if yes.
+  
+  @detail
+  
+  Equality won't necessarily hold if:
+   - the used index covers only part of the @field. 
+     Suppose, we have a CHAR(5) field and INDEX(field(3)). if you make a lookup
+     for 'abc', you will get both record with 'abc' and with 'abcde'.
+   - The type of access is actually ref_or_null, and so @field can be either 
+     a value or NULL.
+
+  @return 
+    Item that the field will be equal to
+    NULL if no such item 
+*/
+
 static Item *
 part_of_refkey(TABLE *table,Field *field)
 {
-  if (!table->reginfo.join_tab)
+  JOIN_TAB *join_tab= table->reginfo.join_tab;
+  if (!join_tab)
     return (Item*) 0;             // field from outer non-select (UPDATE,...)
 
-  uint ref_parts=table->reginfo.join_tab->ref.key_parts;
-  if (ref_parts)
+  uint ref_parts= join_tab->ref.key_parts;
+  if (ref_parts) /* if it's ref/eq_ref/ref_or_null */
   {
-    KEY_PART_INFO *key_part=
-      table->key_info[table->reginfo.join_tab->ref.key].key_part;
+    uint key= join_tab->ref.key;
+    KEY *key_info= join_tab->get_keyinfo_by_key_no(key);
+    KEY_PART_INFO *key_part= key_info->key_part;
 
     for (uint part=0 ; part < ref_parts ; part++,key_part++)
-      if (field->eq(key_part->field) &&
-	  !(key_part->key_part_flag & (HA_PART_KEY_SEG | HA_NULL_PART)))
-	return table->reginfo.join_tab->ref.items[part];
+    {
+      if (field->eq(key_part->field))
+      {
+        /*
+          Found the field in the key. Check that 
+           1. ref_or_null doesn't alternate this component between a value and
+              a NULL
+           2. index fully covers the key
+        */
+        if (part != join_tab->ref.null_ref_part &&            // (1)
+            !(key_part->key_part_flag & HA_PART_KEY_SEG))     // (2)
+        {
+          return join_tab->ref.items[part];
+        }
+        break;
+      }
+    }
   }
   return (Item*) 0;
 }
@@ -13458,26 +18967,46 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
         key as a suffix to the secondary keys. If it has continue to check
         the primary key as a suffix.
       */
-      if (!on_pk_suffix &&
+      if (!on_pk_suffix && (table->key_info[idx].ext_key_part_map & 1) &&
           (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
           table->s->primary_key != MAX_KEY &&
           table->s->primary_key != idx)
       {
+        KEY_PART_INFO *start,*end;
+        uint pk_part_idx= 0;
         on_pk_suffix= TRUE;
-        key_part= table->key_info[table->s->primary_key].key_part;
-        key_part_end=key_part+table->key_info[table->s->primary_key].key_parts;
+        start= key_part= table->key_info[table->s->primary_key].key_part;
         const_key_parts=table->const_key_parts[table->s->primary_key];
 
+        /*
+          Calculate true key_part_end and const_key_parts
+          (we have to stop as first not continous primary key part)
+        */
+        for (key_part_end= key_part,
+             end= key_part+table->key_info[table->s->primary_key].key_parts;
+             key_part_end < end; key_part_end++, pk_part_idx++)
+        {
+          /* Found hole in the pk_parts; Abort */
+          if (!(table->key_info[idx].ext_key_part_map &
+                (((key_part_map) 1) << pk_part_idx)))
+            break;
+        }
+
+        /* Adjust const_key_parts */
+        const_key_parts&= (((key_part_map) 1) << pk_part_idx) -1;
+
         for (; const_key_parts & 1 ; const_key_parts>>= 1)
-          key_part++; 
+          key_part++;
         /*
-         The primary and secondary key parts were all const (i.e. there's
-         one row).  The sorting doesn't matter.
+          Test if the primary key parts were all const (i.e. there's one row).
+          The sorting doesn't matter.
         */
-        if (key_part == key_part_end && reverse == 0)
+        if (key_part ==
+            start+table->key_info[table->s->primary_key].key_parts &&
+            reverse == 0)
         {
           key_parts= 0;
-          reverse= 1;
+          reverse= 1;                           // Key is ok to use
           goto ok;
         }
       }
@@ -13494,7 +19023,8 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
     if (reverse && flag != reverse)
       DBUG_RETURN(0);
     reverse=flag;				// Remember if reverse
-    key_part++;
+    if (key_part < key_part_end)
+      key_part++;
   }
   if (on_pk_suffix)
   {
@@ -13530,23 +19060,6 @@ ok:
   @param table                 Table to scan
   @param usable_keys           Allowed keys
 
-  @note
-     As far as 
-     1) clustered primary key entry data set is a set of all record
-        fields (key fields and not key fields) and
-     2) secondary index entry data is a union of its key fields and
-        primary key fields (at least InnoDB and its derivatives don't
-        duplicate primary key fields there, even if the primary and
-        the secondary keys have a common subset of key fields),
-     then secondary index entry data is always a subset of primary key entry.
-     Unfortunately, key_info[nr].key_length doesn't show the length
-     of key/pointer pair but a sum of key field lengths only, thus
-     we can't estimate index IO volume comparing only this key_length
-     value of secondary keys and clustered PK.
-     So, try secondary keys first, and choose PK only if there are no
-     usable secondary covering keys or found best secondary key include
-     all table fields (i.e. same as PK):
-
   @return
     MAX_KEY     no suitable key found
     key index   otherwise
@@ -13554,41 +19067,23 @@ ok:
 
 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
 {
+  double min_cost= DBL_MAX;
   uint best= MAX_KEY;
-  uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
-                             table->s->primary_key != MAX_KEY &&
-                             usable_keys->is_set(table->s->primary_key)) ?
-                            table->s->primary_key : MAX_KEY;
   if (!usable_keys->is_clear_all())
   {
-    uint min_length= (uint) ~0;
     for (uint nr=0; nr < table->s->keys ; nr++)
     {
-      if (nr == usable_clustered_pk)
-        continue;
       if (usable_keys->is_set(nr))
       {
-        if (table->key_info[nr].key_length < min_length)
+        double cost= table->file->keyread_time(nr, 1, table->file->records());
+        if (cost < min_cost)
         {
-          min_length=table->key_info[nr].key_length;
+          min_cost= cost;
           best=nr;
         }
       }
     }
   }
-  if (usable_clustered_pk != MAX_KEY)
-  {
-    /*
-     If the primary key is clustered and found shorter key covers all table
-     fields then primary key scan normally would be faster because amount of
-     data to scan is the same but PK is clustered.
-     It's safe to compare key parts with table fields since duplicate key
-     parts aren't allowed.
-     */
-    if (best == MAX_KEY ||
-        table->key_info[best].key_parts >= table->s->fields)
-      best= usable_clustered_pk;
-  }
   return best;
 }
 
@@ -13693,8 +19188,6 @@ static bool
 list_contains_unique_index(TABLE *table,
                           bool (*find_func) (Field *, void *), void *data)
 {
-  if (table->pos_in_table_list->outer_join)
-    return 0;
   for (uint keynr= 0; keynr < table->s->keys; keynr++)
   {
     if (keynr == table->s->primary_key ||
@@ -13708,7 +19201,7 @@ list_contains_unique_index(TABLE *table,
            key_part < key_part_end;
            key_part++)
       {
-        if (key_part->field->real_maybe_null() || 
+        if (key_part->field->maybe_null() ||
             !find_func(key_part->field, data))
           break;
       }
@@ -13777,8 +19270,8 @@ find_field_in_item_list (Field *field, void *data)
 
   while ((item= li++))
   {
-    if (item->type() == Item::FIELD_ITEM &&
-        ((Item_field*) item)->field->eq(field))
+    if (item->real_item()->type() == Item::FIELD_ITEM &&
+	((Item_field*) (item->real_item()))->field->eq(field))
     {
       part_found= 1;
       break;
@@ -13810,20 +19303,21 @@ find_field_in_item_list (Field *field, void *data)
 
 static bool
 test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
-			bool no_changes, key_map *map)
+			bool no_changes, const key_map *map)
 {
   int ref_key;
-  uint ref_key_parts;
+  uint UNINIT_VAR(ref_key_parts);
   int order_direction= 0;
-  uint used_key_parts;
+  uint used_key_parts= 0;
   TABLE *table=tab->table;
   SQL_SELECT *select=tab->select;
   key_map usable_keys;
-  QUICK_SELECT_I *save_quick= 0;
+  QUICK_SELECT_I *save_quick= select ? select->quick : 0;
+  Item *orig_cond= 0;
+  bool orig_cond_saved= false;
   int best_key= -1;
-
+  bool changed_key= false;
   DBUG_ENTER("test_if_skip_sort_order");
-  LINT_INIT(ref_key_parts);
 
   /* Check that we are always called with first non-const table */
   DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables);
@@ -13844,7 +19338,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     }
     usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
     if (usable_keys.is_clear_all())
-      DBUG_RETURN(0);					// No usable keys
+      goto use_filesort;                        // No usable keys
   }
 
   ref_key= -1;
@@ -13854,27 +19348,30 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     ref_key=	   tab->ref.key;
     ref_key_parts= tab->ref.key_parts;
     if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT)
-      DBUG_RETURN(0);
+      goto use_filesort;
   }
   else if (select && select->quick)		// Range found by opt_range
   {
     int quick_type= select->quick->get_type();
-    save_quick= select->quick;
     /* 
       assume results are not ordered when index merge is used 
       TODO: sergeyp: Results of all index merge selects actually are ordered 
       by clustered PK values.
     */
   
-    if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || 
+    if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+        quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
-      DBUG_RETURN(0);
-    ref_key=	   select->quick->index;
-    ref_key_parts= select->quick->used_key_parts;
+      ref_key= MAX_KEY;
+    else
+    {
+      ref_key= select->quick->index;
+      ref_key_parts= select->quick->used_key_parts;
+    }
   }
 
-  if (ref_key >= 0)
+  if (ref_key >= 0 && ref_key != MAX_KEY)
   {
     /*
       We come here when there is a REF key.
@@ -13891,10 +19388,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       */
       if (table->covering_keys.is_set(ref_key))
 	usable_keys.intersect(table->covering_keys);
+      if (tab->pre_idx_push_select_cond)
+      {
+        orig_cond= tab->set_cond(tab->pre_idx_push_select_cond);
+        orig_cond_saved= true;
+      }
+
       if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts,
 				       &usable_keys)) < MAX_KEY)
       {
-	/* Found key that can be used to retrieve data in sorted order */
 	if (tab->ref.key >= 0)
 	{
           /*
@@ -13908,10 +19410,10 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
           KEYUSE *keyuse= tab->keyuse;
           while (keyuse->key != new_ref_key && keyuse->table == tab->table)
             keyuse++;
-          if (create_ref_for_key(tab->join, tab, keyuse, 
+          if (create_ref_for_key(tab->join, tab, keyuse, FALSE,
                                  (tab->join->const_table_map |
                                   OUTER_REF_TABLE_BIT)))
-            DBUG_RETURN(0);
+            goto use_filesort;
 
           pick_table_access_method(tab);
 	}
@@ -13921,25 +19423,41 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
             The range optimizer constructed QUICK_RANGE for ref_key, and
             we want to use instead new_ref_key as the index. We can't
             just change the index of the quick select, because this may
-            result in an incosistent QUICK_SELECT object. Below we
+            result in an inconsistent QUICK_SELECT object. Below we
             create a new QUICK_SELECT from scratch so that all its
-            parameres are set correctly by the range optimizer.
+            parameters are set correctly by the range optimizer.
            */
           key_map new_ref_key_map;
+          COND *save_cond;
+          bool res;
           new_ref_key_map.clear_all();  // Force the creation of quick select
           new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
 
+          /* Reset quick;  This will be restored in 'use_filesort' if needed */
           select->quick= 0;
-          if (select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
-                                        (tab->join->select_options &
-                                         OPTION_FOUND_ROWS) ?
-                                        HA_POS_ERROR :
-                                        tab->join->unit->select_limit_cnt,0) <=
-              0)
+          save_cond= select->cond;
+          if (select->pre_idx_push_select_cond)
+            select->cond= select->pre_idx_push_select_cond;
+          res= select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
+                                         (tab->join->select_options &
+                                          OPTION_FOUND_ROWS) ?
+                                         HA_POS_ERROR :
+                                         tab->join->unit->select_limit_cnt,0,
+                                         TRUE) <= 0;
+          if (res)
+          {
+            select->cond= save_cond;
             goto use_filesort;
+          }
+          /*
+            We don't restore select->cond as we want to use the
+            original condition as index condition pushdown is not
+            active for the new index.
+          */
 	}
         ref_key= new_ref_key;
-      }
+        changed_key= true;
+     }
     }
     /* Check if we get the rows in requested sorted order by using the key */
     if (usable_keys.is_set(ref_key) &&
@@ -13948,7 +19466,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       goto check_reverse_order;
   }
   {
-    uint best_key_parts= 0;
+    uint UNINIT_VAR(best_key_parts);
     uint saved_best_key_parts= 0;
     int best_key_direction= 0;
     JOIN *join= tab->join;
@@ -13963,49 +19481,46 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     /*
       filesort() and join cache are usually faster than reading in 
       index order and not using join cache, except in case that chosen
-      index is clustered primary key.
+      index is clustered key.
     */
-    if ((select_limit >= table_records) &&
-        (tab->type == JT_ALL &&
-         tab->join->tables > tab->join->const_tables + 1) &&
-         ((unsigned) best_key != table->s->primary_key ||
-          !table->file->primary_key_is_clustered()))
+    if (best_key < 0 ||
+        ((select_limit >= table_records) &&
+         (tab->type == JT_ALL &&
+         tab->join->table_count > tab->join->const_tables + 1) &&
+         !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
       goto use_filesort;
 
-    if (best_key >= 0)
+    if (select &&
+        table->quick_keys.is_set(best_key) && best_key != ref_key)
     {
-      if (select &&
-          table->quick_keys.is_set(best_key) && best_key != ref_key)
-      {
-        key_map map;
-        map.clear_all();       // Force the creation of quick select
-        map.set_bit(best_key); // only best_key.
-        select->quick= 0;
-        select->test_quick_select(join->thd, map, 0,
-                                  join->select_options & OPTION_FOUND_ROWS ?
-                                  HA_POS_ERROR :
-                                  join->unit->select_limit_cnt,
-                                  0);
-      }
-      order_direction= best_key_direction;
-      /*
-        saved_best_key_parts is actual number of used keyparts found by the
-        test_if_order_by_key function. It could differ from keyinfo->key_parts,
-        thus we have to restore it in case of desc order as it affects
-        QUICK_SELECT_DESC behaviour.
-      */
-      used_key_parts= (order_direction == -1) ?
-        saved_best_key_parts :  best_key_parts;
+      key_map map;
+      map.clear_all();       // Force the creation of quick select
+      map.set_bit(best_key); // only best_key.
+      select->quick= 0;
+      select->test_quick_select(join->thd, map, 0,
+                                join->select_options & OPTION_FOUND_ROWS ?
+                                HA_POS_ERROR :
+                                join->unit->select_limit_cnt,
+                                TRUE, FALSE);
     }
-    else
-      goto use_filesort;
-  } 
+    order_direction= best_key_direction;
+    /*
+      saved_best_key_parts is actual number of used keyparts found by the
+      test_if_order_by_key function. It could differ from keyinfo->key_parts,
+      thus we have to restore it in case of desc order as it affects
+      QUICK_SELECT_DESC behaviour.
+    */
+    used_key_parts= (order_direction == -1) ?
+      saved_best_key_parts :  best_key_parts;
+    changed_key= true;
+  }
 
 check_reverse_order:                  
   DBUG_ASSERT(order_direction != 0);
 
   if (order_direction == -1)		// If ORDER BY ... DESC
   {
+    int quick_type;
     if (select && select->quick)
     {
       /*
@@ -14014,25 +19529,23 @@ check_reverse_order:
       */
       if (select->quick->reverse_sorted())
         goto skipped_filesort;
-      else
+
+      quick_type= select->quick->get_type();
+      if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
       {
-        int quick_type= select->quick->get_type();
-        if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
-            quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
-            quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
-            quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
-        {
-          tab->limit= 0;
-          goto use_filesort;               // Use filesort
-        }
+        tab->limit= 0;
+        goto use_filesort;               // Use filesort
       }
     }
   }
 
   /*
-    Update query plan with access pattern for doing 
-    ordered access according to what we have decided
-    above.
+    Update query plan with access pattern for doing ordered access
+    according to what we have decided above.
   */
   if (!no_changes) // We are allowed to update QEP
   {
@@ -14046,7 +19559,7 @@ check_reverse_order:
          and best_key doesn't, then revert the decision.
       */
       if (!table->covering_keys.is_set(best_key))
-        table->set_keyread(FALSE);
+        table->disable_keyread();
       if (!quick_created)
       {
         if (select)                  // Throw any existing quick select
@@ -14057,16 +19570,28 @@ check_reverse_order:
                                 join_read_first:join_read_last;
         tab->type=JT_NEXT;           // Read with index_first(), index_next()
 
+        if (tab->pre_idx_push_select_cond)
+        {
+          tab->set_cond(tab->pre_idx_push_select_cond);
+          /*
+            orig_cond is a part of pre_idx_push_cond,
+            no need to restore it.
+          */
+          orig_cond= 0;
+          orig_cond_saved= false;
+        }
+
         table->file->ha_index_or_rnd_end();
         if (tab->join->select_options & SELECT_DESCRIBE)
         {
           tab->ref.key= -1;
           tab->ref.key_parts= 0;
-          if (select_limit < table->file->stats.records) 
+          if (select_limit < table->file->stats.records)
             tab->limit= select_limit;
+          table->disable_keyread();
         }
       }
-      else if (tab->type != JT_ALL)
+      else if (tab->type != JT_ALL || tab->select->quick)
       {
         /*
           We're about to use a quick access to the table.
@@ -14081,6 +19606,16 @@ check_reverse_order:
         tab->read_first_record= join_init_read_record;
         if (tab->is_using_loose_index_scan())
           tab->join->tmp_table_param.precomputed_group_by= TRUE;
+
+        /*
+          Restore the original condition as changes done by pushdown
+          condition are not relevant anymore
+        */
+        if (tab->select && tab->select->pre_idx_push_select_cond)
+	{
+          tab->set_cond(tab->select->pre_idx_push_select_cond);
+           tab->table->file->cancel_pushed_idx_cond();
+        }
         /*
           TODO: update the number of records in join->best_positions[tablenr]
         */
@@ -14098,6 +19633,14 @@ check_reverse_order:
           tab->limit= 0;
           goto use_filesort;           // Reverse sort failed -> filesort
         }
+        /*
+          Cancel Pushed Index Condition, as it doesn't work for reverse scans.
+        */
+        if (tab->select && tab->select->pre_idx_push_select_cond)
+	{
+          tab->set_cond(tab->select->pre_idx_push_select_cond);
+           tab->table->file->cancel_pushed_idx_cond();
+        }
         if (select->quick == save_quick)
           save_quick= 0;                // make_reverse() consumed it
         select->set_quick(tmp);
@@ -14113,10 +19656,18 @@ check_reverse_order:
         */
         tab->read_first_record= join_read_last_key;
         tab->read_record.read_record= join_read_prev_same;
+        /*
+          Cancel Pushed Index Condition, as it doesn't work for reverse scans.
+        */
+        if (tab->select && tab->select->pre_idx_push_select_cond)
+	{
+          tab->set_cond(tab->select->pre_idx_push_select_cond);
+           tab->table->file->cancel_pushed_idx_cond();
+        }
       }
     }
     else if (select && select->quick)
-      select->quick->sorted= 1;
+      select->quick->need_sorted_output();
 
   } // QEP has been modified
 
@@ -14133,6 +19684,11 @@ skipped_filesort:
     delete save_quick;
     save_quick= NULL;
   }
+  if (orig_cond_saved && !changed_key)
+    tab->set_cond(orig_cond);
+  if (!no_changes && changed_key && table->file->pushed_idx_cond)
+    table->file->cancel_pushed_idx_cond();
+
   DBUG_RETURN(1);
 
 use_filesort:
@@ -14142,6 +19698,9 @@ use_filesort:
     delete select->quick;
     select->quick= save_quick;
   }
+  if (orig_cond_saved)
+    tab->set_cond(orig_cond);
+
   DBUG_RETURN(0);
 }
 
@@ -14185,21 +19744,69 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   TABLE *table;
   SQL_SELECT *select;
   JOIN_TAB *tab;
+  int err= 0;
+  bool quick_created= FALSE;
   DBUG_ENTER("create_sort_index");
 
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
     DBUG_RETURN(0);				// One row, no need to sort
   tab=    join->join_tab + join->const_tables;
   table=  tab->table;
   select= tab->select;
-  tab->saved_select= NULL;
-  /* 
-    If we have a select->quick object that is created outside of
-    create_sort_index() and this is part of a subquery that
-    potentially can be executed multiple times then we should not
-    delete the quick object on exit from this function.
-  */
-  bool keep_quick= select && select->quick && join->join_tab_save;
+  
+  JOIN_TAB *save_pre_sort_join_tab= NULL;
+  if (join->pre_sort_join_tab)
+  {
+    /*
+      we've already been in this function, and stashed away the original access 
+      method in join->pre_sort_join_tab, restore it now.
+    */
+    
+    /* First, restore state of the handler */
+    if (join->pre_sort_index != MAX_KEY)
+    {
+      if (table->file->ha_index_or_rnd_end())
+        goto err;
+      if (join->pre_sort_idx_pushed_cond)
+      {
+        table->file->idx_cond_push(join->pre_sort_index,
+                                 join->pre_sort_idx_pushed_cond);
+      }
+    }
+    else
+    {
+      if (table->file->ha_index_or_rnd_end() || 
+          table->file->ha_rnd_init(TRUE))
+        goto err;
+    }
+
+    /* Second, restore access method parameters */
+    tab->records=           join->pre_sort_join_tab->records;
+    tab->select=            join->pre_sort_join_tab->select;
+    tab->select_cond=       join->pre_sort_join_tab->select_cond;
+    tab->type=              join->pre_sort_join_tab->type;
+    tab->read_first_record= join->pre_sort_join_tab->read_first_record; 
+
+    save_pre_sort_join_tab= join->pre_sort_join_tab;
+    join->pre_sort_join_tab= NULL;
+  }
+  else
+  {
+    /* 
+      Save index #, save index condition. Do it right now, because MRR may 
+    */
+    if (table->file->inited == handler::INDEX)
+    {
+      join->pre_sort_index= table->file->active_index;
+      join->pre_sort_idx_pushed_cond= table->file->pushed_idx_cond;
+      // no need to save key_read
+    }
+    else
+      join->pre_sort_index= MAX_KEY;
+  }
+
+  /* Currently ORDER BY ... LIMIT is not supported in subqueries. */
+  DBUG_ASSERT(join->group_list || !join->is_in_subquery());
 
   /*
     When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
@@ -14225,6 +19832,9 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
                                              MYF(MY_WME | MY_ZEROFILL));
   table->status=0;				// May be wrong if quick_select
 
+  if (!tab->preread_init_done && tab->preread_init())
+    goto err;
+
   // If table has a range, move it to select
   if (select && !select->quick && tab->ref.key >= 0)
   {
@@ -14237,7 +19847,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
         and in index_merge 'Only index' cannot be used
       */
       if (((uint) tab->ref.key != select->quick->index))
-        table->set_keyread(FALSE);
+        table->disable_keyread();
     }
     else
     {
@@ -14248,11 +19858,11 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 	field, quick will contain an empty record set.
       */
       if (!(select->quick= (tab->type == JT_FT ?
-			    new FT_SELECT(thd, table, tab->ref.key) :
+			    get_ft_select(thd, table, tab->ref.key) :
 			    get_quick_select_for_ref(thd, table, &tab->ref, 
                                                      tab->found_records))))
 	goto err;
-      DBUG_ASSERT(!keep_quick);
+      quick_created= TRUE;
     }
   }
 
@@ -14266,7 +19876,27 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   table->sort.found_records=filesort(thd, table,join->sortorder, length,
                                      select, filesort_limit, 0,
                                      &examined_rows);
+
+  if (quick_created)
+  {
+    /* This will delete the quick select. */
+    select->cleanup();
+  }
+
+  if (!join->pre_sort_join_tab)
+  {
+    if (save_pre_sort_join_tab)
+      join->pre_sort_join_tab= save_pre_sort_join_tab;
+    else if (!(join->pre_sort_join_tab= (JOIN_TAB*)thd->alloc(sizeof(JOIN_TAB))))
+      goto err;
+  }
+
+  *(join->pre_sort_join_tab)= *tab;
+  
+  /*TODO: here, close the index scan, cancel index-only read. */
   tab->records= table->sort.found_records;	// For SQL_CALC_ROWS
+#if 0 
+  /* MariaDB doesn't need the following: */
   if (select)
   {
     /*
@@ -14305,6 +19935,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 
       // If we deleted the quick object we need to clear quick_keys
       table->quick_keys.clear_all();
+      table->intersect_keys.clear_all();
     }
     else
     {
@@ -14327,32 +19958,79 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     // Restore the output resultset
     table->sort.io_cache= tablesort_result_cache;
   }
-  tab->select_cond=0;
-  tab->last_inner= 0;
-  tab->first_unmatched= 0;
+#endif
+  tab->select=NULL;
+  tab->set_select_cond(NULL, __LINE__);
   tab->type=JT_ALL;				// Read with normal read_record
   tab->read_first_record= join_init_read_record;
+  tab->table->file->ha_index_or_rnd_end();
+  
+  if (err)
+    goto err;
+
   tab->join->examined_rows+=examined_rows;
-  table->set_keyread(FALSE); // Restore if we used indexes
   DBUG_RETURN(table->sort.found_records == HA_POS_ERROR);
 err:
   DBUG_RETURN(-1);
 }
 
 
-/*****************************************************************************
-  Remove duplicates from tmp table
-  This should be recoded to add a unique index to the table and remove
-  duplicates
-  Table is a locked single thread table
-  fields is the number of fields to check (from the end)
-*****************************************************************************/
+void JOIN::clean_pre_sort_join_tab()
+{
+  //TABLE *table=  pre_sort_join_tab->table;
+  /*
+   Note: we can come here for fake_select_lex object. That object will have
+   the table already deleted by st_select_lex_unit::cleanup().  
+    We rely on that fake_select_lex didn't have quick select.
+  */
+#if 0  
+  if (pre_sort_join_tab->select && pre_sort_join_tab->select->quick)
+  {
+    /*
+      We need to preserve tablesort's output resultset here, because
+      QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT (called by
+      SQL_SELECT::cleanup()) may free it assuming it's the result of the quick
+      select operation that we no longer need. Note that all the other parts of
+      this data structure are cleaned up when
+      QUICK_INDEX_MERGE_SELECT::get_next encounters end of data, so the next
+      SQL_SELECT::cleanup() call changes sort.io_cache alone.
+    */
+    IO_CACHE *tablesort_result_cache;
 
+    tablesort_result_cache= table->sort.io_cache;
+    table->sort.io_cache= NULL;
+    pre_sort_join_tab->select->cleanup();
+    table->quick_keys.clear_all();  // as far as we cleanup select->quick
+    table->intersect_keys.clear_all();
+    table->sort.io_cache= tablesort_result_cache;
+  }
+#endif
+  //table->disable_keyread(); // Restore if we used indexes
+  if (pre_sort_join_tab->select && pre_sort_join_tab->select->quick)
+  {
+    pre_sort_join_tab->select->cleanup();
+  }
+}
+
+
+/**
+  Compare fields from table->record[0] and table->record[1],
+  possibly skipping few first fields.
+
+  @param table
+  @param ptr                    field to start the comparison from,
+                                somewhere in the table->field[] array
+
+  @retval 1     different
+  @retval 0     identical
+*/
 static bool compare_record(TABLE *table, Field **ptr)
 {
   for (; *ptr ; ptr++)
   {
-    if ((*ptr)->cmp_offset(table->s->rec_buff_length))
+    Field *f= *ptr;
+    if (f->is_null() != f->is_null(table->s->rec_buff_length) ||
+        (!f->is_null() && f->cmp_offset(table->s->rec_buff_length)))
       return 1;
   }
   return 0;
@@ -14380,15 +20058,16 @@ static void free_blobs(Field **ptr)
 
 
 static int
-remove_duplicates(JOIN *join, TABLE *entry,List<Item> &fields, Item *having)
+remove_duplicates(JOIN *join, TABLE *table, List<Item> &fields, Item *having)
 {
   int error;
-  ulong reclength,offset;
+  ulong keylength= 0;
   uint field_count;
   THD *thd= join->thd;
+
   DBUG_ENTER("remove_duplicates");
 
-  entry->reginfo.lock_type=TL_WRITE;
+  table->reginfo.lock_type=TL_WRITE;
 
   /* Calculate how many saved fields there is in list */
   field_count=0;
@@ -14405,45 +20084,49 @@ remove_duplicates(JOIN *join, TABLE *entry,List<Item> &fields, Item *having)
     join->unit->select_limit_cnt= 1;		// Only send first row
     DBUG_RETURN(0);
   }
-  Field **first_field=entry->field+entry->s->fields - field_count;
-  offset= (field_count ? 
-           entry->field[entry->s->fields - field_count]->
-           offset(entry->record[0]) : 0);
-  reclength=entry->s->reclength-offset;
-
-  free_io_cache(entry);				// Safety
-  entry->file->info(HA_STATUS_VARIABLE);
-  if (entry->s->db_type() == heap_hton ||
-      (!entry->s->blob_fields &&
-       ((ALIGN_SIZE(reclength) + HASH_OVERHEAD) * entry->file->stats.records <
+
+  Field **first_field=table->field+table->s->fields - field_count;
+  for (Field **ptr=first_field; *ptr; ptr++)
+    keylength+= (*ptr)->sort_length() + (*ptr)->maybe_null();
+
+  /*
+    Disable LIMIT ROWS EXAMINED in order to avoid interrupting prematurely
+    duplicate removal, and produce a possibly incomplete query result.
+  */
+  thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX;
+  if (thd->killed == ABORT_QUERY)
+    thd->reset_killed();
+
+  free_io_cache(table);				// Safety
+  table->file->info(HA_STATUS_VARIABLE);
+  if (table->s->db_type() == heap_hton ||
+      (!table->s->blob_fields &&
+       ((ALIGN_SIZE(keylength) + HASH_OVERHEAD) * table->file->stats.records <
 	thd->variables.sortbuff_size)))
-    error=remove_dup_with_hash_index(join->thd, entry,
-				     field_count, first_field,
-				     reclength, having);
+    error=remove_dup_with_hash_index(join->thd, table, field_count, first_field,
+				     keylength, having);
   else
-    error=remove_dup_with_compare(join->thd, entry, first_field, offset,
-				  having);
+    error=remove_dup_with_compare(join->thd, table, first_field, having);
 
+  if (join->select_lex != join->select_lex->master_unit()->fake_select_lex)
+    thd->lex->set_limit_rows_examined();
   free_blobs(first_field);
   DBUG_RETURN(error);
 }
 
 
 static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
-				   ulong offset, Item *having)
+				   Item *having)
 {
   handler *file=table->file;
-  char *org_record,*new_record;
-  uchar *record;
+  uchar *record=table->record[0];
   int error;
-  ulong reclength= table->s->reclength-offset;
   DBUG_ENTER("remove_dup_with_compare");
 
-  org_record=(char*) (record=table->record[0])+offset;
-  new_record=(char*) table->record[1]+offset;
+  if (file->ha_rnd_init_with_error(1))
+    DBUG_RETURN(1);
 
-  file->ha_rnd_init(1);
-  error=file->rnd_next(record);
+  error= file->ha_rnd_next(record);
   for (;;)
   {
     if (thd->killed)
@@ -14456,7 +20139,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
     {
       if (error == HA_ERR_RECORD_DELETED)
       {
-        error= file->rnd_next(record);
+        error= file->ha_rnd_next(record);
         continue;
       }
       if (error == HA_ERR_END_OF_FILE)
@@ -14465,9 +20148,9 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
     }
     if (having && !having->val_int())
     {
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
-      error=file->rnd_next(record);
+      error= file->ha_rnd_next(record);
       continue;
     }
     if (copy_blobs(first_field))
@@ -14476,13 +20159,13 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
       error=0;
       goto err;
     }
-    memcpy(new_record,org_record,reclength);
+    store_record(table,record[1]);
 
     /* Read through rest of file and mark duplicated rows deleted */
     bool found=0;
     for (;;)
     {
-      if ((error=file->rnd_next(record)))
+      if ((error= file->ha_rnd_next(record)))
       {
 	if (error == HA_ERR_RECORD_DELETED)
 	  continue;
@@ -14492,19 +20175,21 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
       }
       if (compare_record(table, first_field) == 0)
       {
-	if ((error=file->ha_delete_row(record)))
+	if ((error= file->ha_delete_row(record)))
 	  goto err;
       }
       else if (!found)
       {
 	found=1;
-	file->position(record);	// Remember position
+        if ((error= file->remember_rnd_pos()))
+          goto err;
       }
     }
     if (!found)
       break;					// End of file
-    /* Restart search on next row */
-    error=file->restart_rnd_next(record,file->ref);
+    /* Restart search on saved row */
+    if ((error= file->restart_rnd_next(record)))
+      goto err;
   }
 
   file->extra(HA_EXTRA_NO_CACHE);
@@ -14534,8 +20219,9 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
   int error;
   handler *file= table->file;
   ulong extra_length= ALIGN_SIZE(key_length)-key_length;
-  uint *field_lengths,*field_length;
+  uint *field_lengths, *field_length;
   HASH hash;
+  Field **ptr;
   DBUG_ENTER("remove_dup_with_hash_index");
 
   if (!my_multi_malloc(MYF(MY_WME),
@@ -14547,21 +20233,8 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
 		       NullS))
     DBUG_RETURN(1);
 
-  {
-    Field **ptr;
-    ulong total_length= 0;
-    for (ptr= first_field, field_length=field_lengths ; *ptr ; ptr++)
-    {
-      uint length= (*ptr)->sort_length();
-      (*field_length++)= length;
-      total_length+= length;
-    }
-    DBUG_PRINT("info",("field_count: %u  key_length: %lu  total_length: %lu",
-                       field_count, key_length, total_length));
-    DBUG_ASSERT(total_length <= key_length);
-    key_length= total_length;
-    extra_length= ALIGN_SIZE(key_length)-key_length;
-  }
+  for (ptr= first_field, field_length=field_lengths ; *ptr ; ptr++)
+    (*field_length++)= (*ptr)->sort_length();
 
   if (my_hash_init(&hash, &my_charset_bin, (uint) file->stats.records, 0, 
                    key_length, (my_hash_get_key) 0, 0, 0))
@@ -14570,7 +20243,9 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     DBUG_RETURN(1);
   }
 
-  file->ha_rnd_init(1);
+  if ((error= file->ha_rnd_init(1)))
+    goto err;
+
   key_pos=key_buffer;
   for (;;)
   {
@@ -14581,7 +20256,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
       error=0;
       goto err;
     }
-    if ((error=file->rnd_next(record)))
+    if ((error= file->ha_rnd_next(record)))
     {
       if (error == HA_ERR_RECORD_DELETED)
 	continue;
@@ -14591,7 +20266,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     }
     if (having && !having->val_int())
     {
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
       continue;
     }
@@ -14599,16 +20274,16 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     /* copy fields to key buffer */
     org_key_pos= key_pos;
     field_length=field_lengths;
-    for (Field **ptr= first_field ; *ptr ; ptr++)
+    for (ptr= first_field ; *ptr ; ptr++)
     {
-      (*ptr)->sort_string(key_pos,*field_length);
-      key_pos+= *field_length++;
+      (*ptr)->make_sort_key(key_pos, *field_length);
+      key_pos+= (*ptr)->maybe_null() + *field_length++;
     }
     /* Check if it exists before */
     if (my_hash_search(&hash, org_key_pos, key_length))
     {
       /* Duplicated found ; Remove the row */
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
     }
     else
@@ -14689,274 +20364,52 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
 }
 
 
-/*****************************************************************************
-  Fill join cache with packed records
-  Records are stored in tab->cache.buffer and last record in
-  last record is stored with pointers to blobs to support very big
-  records
-******************************************************************************/
+/*
+  eq_ref: Create the lookup key and check if it is the same as saved key
 
-static int
-join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count)
-{
-  reg1 uint i;
-  uint length, blobs;
-  size_t size;
-  CACHE_FIELD *copy,**blob_ptr;
-  JOIN_CACHE  *cache;
-  JOIN_TAB *join_tab;
-  DBUG_ENTER("join_init_cache");
-
-  cache= &tables[table_count].cache;
-  cache->fields=blobs=0;
-
-  join_tab=tables;
-  for (i=0 ; i < table_count ; i++,join_tab++)
-  {
-    if (!join_tab->used_fieldlength)		/* Not calced yet */
-      calc_used_field_length(thd, join_tab);
-    cache->fields+=join_tab->used_fields;
-    blobs+=join_tab->used_blobs;
-  }
-  if (!(cache->field=(CACHE_FIELD*)
-	sql_alloc(sizeof(CACHE_FIELD)*(cache->fields+table_count*2)+(blobs+1)*
-
-		  sizeof(CACHE_FIELD*))))
-  {
-    my_free(cache->buff);		/* purecov: inspected */
-    cache->buff=0;				/* purecov: inspected */
-    DBUG_RETURN(1);				/* purecov: inspected */
-  }
-  copy=cache->field;
-  blob_ptr=cache->blob_ptr=(CACHE_FIELD**)
-    (cache->field+cache->fields+table_count*2);
-
-  length=0;
-  for (i=0 ; i < table_count ; i++)
-  {
-    bool have_bit_fields= FALSE;
-    uint null_fields=0,used_fields;
-    Field **f_ptr,*field;
-    MY_BITMAP *read_set= tables[i].table->read_set;
-    for (f_ptr=tables[i].table->field,used_fields=tables[i].used_fields ;
-	 used_fields ;
-	 f_ptr++)
-    {
-      field= *f_ptr;
-      if (bitmap_is_set(read_set, field->field_index))
-      {
-	used_fields--;
-	length+=field->fill_cache_field(copy);
-	if (copy->type == CACHE_BLOB)
-	  (*blob_ptr++)=copy;
-	if (field->real_maybe_null())
-	  null_fields++;
-        if (field->type() == MYSQL_TYPE_BIT &&
-            ((Field_bit*)field)->bit_len)
-          have_bit_fields= TRUE;    
-	copy++;
-      }
-    }
-    /* Copy null bits from table */
-    if (null_fields || have_bit_fields)
-    {						/* must copy null bits */
-      copy->str= tables[i].table->null_flags;
-      copy->length= tables[i].table->s->null_bytes;
-      copy->type=0;
-      copy->field=0;
-      length+=copy->length;
-      copy++;
-      cache->fields++;
-    }
-    /* If outer join table, copy null_row flag */
-    if (tables[i].table->maybe_null)
-    {
-      copy->str= (uchar*) &tables[i].table->null_row;
-      copy->length=sizeof(tables[i].table->null_row);
-      copy->type=0;
-      copy->field=0;
-      length+=copy->length;
-      copy++;
-      cache->fields++;
-    }
-  }
 
-  cache->length=length+blobs*sizeof(char*);
-  cache->blobs=blobs;
-  *blob_ptr=0;					/* End sequentel */
-  size=max(thd->variables.join_buff_size, cache->length);
-  if (!(cache->buff=(uchar*) my_malloc(size,MYF(0))))
-    DBUG_RETURN(1);				/* Don't use cache */ /* purecov: inspected */
-  cache->end=cache->buff+size;
-  reset_cache_write(cache);
-  DBUG_RETURN(0);
-}
 
 
-static ulong
-used_blob_length(CACHE_FIELD **ptr)
-{
-  uint length,blob_length;
-  for (length=0 ; *ptr ; ptr++)
-  {
-    Field_blob *field_blob= (Field_blob *) (*ptr)->field;
-    (*ptr)->blob_length=blob_length= field_blob->get_length();
-    length+=blob_length;
-    field_blob->get_ptr(&(*ptr)->str);
-  }
-  return length;
-}
+  SYNOPSIS
+    cmp_buffer_with_ref()
+      tab      Join tab of the accessed table
+      table    The table to read.  This is usually tab->table, except for 
+               semi-join when we might need to make a lookup in a temptable
+               instead.
+      tab_ref  The structure with methods to collect index lookup tuple. 
+               This is usually table->ref, except for the case of when we're 
+               doing lookup into semi-join materialization table.
+
+  DESCRIPTION 
+    Used by eq_ref access method: create the index lookup key and check if 
+    we've used this key at previous lookup (If yes, we don't need to repeat
+    the lookup - the record has been already fetched)
 
+  RETURN 
+    TRUE   No cached record for the key, or failed to create the key (due to
+           out-of-domain error)
+    FALSE  The created key is the same as the previous one (and the record 
+           is already in table->record)
+*/
 
 static bool
-store_record_in_cache(JOIN_CACHE *cache)
+cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
 {
-  uint length;
-  uchar *pos;
-  CACHE_FIELD *copy,*end_field;
-  bool last_record;
-
-  pos=cache->pos;
-  end_field=cache->field+cache->fields;
-
-  length=cache->length;
-  if (cache->blobs)
-    length+=used_blob_length(cache->blob_ptr);
-  if ((last_record= (length + cache->length > (size_t) (cache->end - pos))))
-    cache->ptr_record=cache->records;
-
-  /*
-    There is room in cache. Put record there
-  */
-  cache->records++;
-  for (copy=cache->field ; copy < end_field; copy++)
+  bool no_prev_key;
+  if (!tab_ref->disable_cache)
   {
-    if (copy->type == CACHE_BLOB)
+    if (!(no_prev_key= tab_ref->key_err))
     {
-      Field_blob *blob_field= (Field_blob *) copy->field;
-      if (last_record)
-      {
-	blob_field->get_image(pos, copy->length+sizeof(char*), 
-                              blob_field->charset());
-	pos+=copy->length+sizeof(char*);
-      }
-      else
-      {
-	blob_field->get_image(pos, copy->length, // blob length
-                              blob_field->charset());
-	memcpy(pos+copy->length,copy->str,copy->blob_length);  // Blob data
-	pos+=copy->length+copy->blob_length;
-      }
-    }
-    else
-    {
-      if (copy->type == CACHE_STRIPPED)
-      {
-	uchar *str,*end;
-        Field *field= copy->field;
-        if (field && field->maybe_null() && field->is_null())
-          end= str= copy->str;
-        else
-          for (str=copy->str,end= str+copy->length;
-               end > str && end[-1] == ' ' ;
-               end--) ;
-	length=(uint) (end-str);
-	memcpy(pos+2, str, length);
-        int2store(pos, length);
-	pos+= length+2;
-      }
-      else
-      {
-	memcpy(pos,copy->str,copy->length);
-	pos+=copy->length;
-      }
+      /* Previous access found a row. Copy its key */
+      memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
     }
   }
-  cache->pos=pos;
-  return last_record || (size_t) (cache->end - pos) < cache->length;
-}
-
-
-static void
-reset_cache_read(JOIN_CACHE *cache)
-{
-  cache->record_nr=0;
-  cache->pos=cache->buff;
-}
-
-
-static void reset_cache_write(JOIN_CACHE *cache)
-{
-  reset_cache_read(cache);
-  cache->records= 0;
-  cache->ptr_record= (uint) ~0;
-}
-
-
-static void
-read_cached_record(JOIN_TAB *tab)
-{
-  uchar *pos;
-  uint length;
-  bool last_record;
-  CACHE_FIELD *copy,*end_field;
-
-  last_record=tab->cache.record_nr++ == tab->cache.ptr_record;
-  pos=tab->cache.pos;
-
-  for (copy=tab->cache.field,end_field=copy+tab->cache.fields ;
-       copy < end_field;
-       copy++)
-  {
-    if (copy->type == CACHE_BLOB)
-    {
-      Field_blob *blob_field= (Field_blob *) copy->field;
-      if (last_record)
-      {
-	blob_field->set_image(pos, copy->length+sizeof(char*),
-                              blob_field->charset());
-	pos+=copy->length+sizeof(char*);
-      }
-      else
-      {
-	blob_field->set_ptr(pos, pos+copy->length);
-	pos+=copy->length + blob_field->get_length();
-      }
-    }
-    else
-    {
-      if (copy->type == CACHE_STRIPPED)
-      {
-        length= uint2korr(pos);
-	memcpy(copy->str, pos+2, length);
-	memset(copy->str+length, ' ', copy->length-length);
-	pos+= 2 + length;
-      }
-      else
-      {
-	memcpy(copy->str,pos,copy->length);
-	pos+=copy->length;
-      }
-    }
-  }
-  tab->cache.pos=pos;
-  return;
-}
-
-
-static bool
-cmp_buffer_with_ref(JOIN_TAB *tab)
-{
-  bool diff;
-  if (!(diff=tab->ref.key_err))
-  {
-    memcpy(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length);
-  }
-  if ((tab->ref.key_err= cp_buffer_from_ref(tab->join->thd, tab->table,
-                                            &tab->ref)) ||
-      diff)
+  else 
+    no_prev_key= TRUE;
+  if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
+      no_prev_key)
     return 1;
-  return memcmp(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length)
+  return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
     != 0;
 }
 
@@ -15045,11 +20498,11 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
                order_item->full_name(), thd->where);
       return TRUE;
     }
-    order->item= ref_pointer_array + count - 1;
+    thd->change_item_tree((Item**)&order->item, (Item*)(ref_pointer_array + count - 1));
     order->in_field_list= 1;
     order->counter= count;
     order->counter_used= 1;
-    return FALSE;
+   return FALSE;
   }
   /* Lookup the current GROUP/ORDER field in the SELECT clause. */
   select_item= find_item_in_list(order_item, fields, &counter,
@@ -15074,12 +20527,11 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     /* Lookup the current GROUP field in the FROM clause. */
     order_item_type= order_item->type();
     from_field= (Field*) not_found_field;
-    if ((is_group_field &&
-        order_item_type == Item::FIELD_ITEM) ||
+    if ((is_group_field && order_item_type == Item::FIELD_ITEM) ||
         order_item_type == Item::REF_ITEM)
     {
       from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
-                                       NULL, &view_ref, IGNORE_ERRORS, TRUE,
+                                       NULL, &view_ref, IGNORE_ERRORS, FALSE,
                                        FALSE);
       if (!from_field)
         from_field= (Field*) not_found_field;
@@ -15135,30 +20587,12 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     time.
 
     We check order_item->fixed because Item_func_group_concat can put
-    arguments for which fix_fields already was called.
-    
-    group_fix_field= TRUE is to resolve aliases from the SELECT list
-    without creating of Item_ref-s: JOIN::exec() wraps aliased items
-    in SELECT list with Item_copy items. To re-evaluate such a tree
-    that includes Item_copy items we have to refresh Item_copy caches,
-    but:
-      - filesort() never refresh Item_copy items,
-      - end_send_group() checks every record for group boundary by the
-        test_if_group_changed function that obtain data from these
-        Item_copy items, but the copy_fields function that
-        refreshes Item copy items is called after group boundaries only -
-        that is a vicious circle.
-    So we prevent inclusion of Item_copy items.
+    arguments for which fix_fields already was called.    
   */
-  bool save_group_fix_field= thd->lex->current_select->group_fix_field;
-  if (is_group_field)
-    thd->lex->current_select->group_fix_field= TRUE;
-  bool ret= (!order_item->fixed &&
+  if (!order_item->fixed &&
       (order_item->fix_fields(thd, order->item) ||
        (order_item= *order->item)->check_cols(1) ||
-       thd->is_fatal_error));
-  thd->lex->current_select->group_fix_field= save_group_fix_field;
-  if (ret)
+       thd->is_error()))
     return TRUE; /* Wrong field. */
 
   uint el= all_fields.elements;
@@ -15241,6 +20675,8 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
   uint org_fields=all_fields.elements;
 
   thd->where="group statement";
+  enum_parsing_place save_place= thd->lex->current_select->parsing_place;
+  thd->lex->current_select->parsing_place= IN_GROUP_BY;
   for (ord= order; ord; ord= ord->next)
   {
     if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
@@ -15253,6 +20689,8 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
       return 1;
     }
   }
+  thd->lex->current_select->parsing_place= save_place;
+
   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY)
   {
     /*
@@ -15274,7 +20712,7 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     Item_field *field;
     int cur_pos_in_select_list= 0;
     List_iterator<Item> li(fields);
-    List_iterator<Item_field> naf_it(thd->lex->current_select->non_agg_fields);
+    List_iterator<Item_field> naf_it(thd->lex->current_select->join->non_agg_fields);
 
     field= naf_it++;
     while (field && (item=li++))
@@ -15520,8 +20958,11 @@ test_if_subpart(ORDER *a,ORDER *b)
 */
 
 static TABLE *
-get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
+get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables, 
+                  table_map const_tables)
 {
+  TABLE_LIST *table;
+  List_iterator<TABLE_LIST> ti(tables);
   table_map map= (table_map) 0;
   DBUG_ENTER("get_sort_by_table");
 
@@ -15532,6 +20973,23 @@ get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
 
   for (; a && b; a=a->next,b=b->next)
   {
+    /* Skip elements of a that are constant */
+    while (!((*a->item)->used_tables() & ~const_tables))
+    {
+      if (!(a= a->next))
+        break;
+    }
+
+    /* Skip elements of b that are constant */
+    while (!((*b->item)->used_tables() & ~const_tables))
+    {
+      if (!(b= b->next))
+        break;
+    }
+
+    if (!a || !b)
+      break;
+
     if (!(*a->item)->eq(*b->item,1))
       DBUG_RETURN(0);
     map|=a->item[0]->used_tables();
@@ -15539,11 +20997,11 @@ get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
   if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
     DBUG_RETURN(0);
 
-  for (; !(map & tables->table->map); tables= tables->next_leaf) ;
-  if (map != tables->table->map)
+  while ((table= ti++) && !(map & table->table->map)) ;
+  if (map != table->table->map)
     DBUG_RETURN(0);				// More than one table
-  DBUG_PRINT("exit",("sort by table: %d",tables->table->tablenr));
-  DBUG_RETURN(tables->table);
+  DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr));
+  DBUG_RETURN(table->table);
 }
 
 
@@ -15679,7 +21137,7 @@ alloc_group_fields(JOIN *join,ORDER *group)
   {
     for (; group ; group=group->next)
     {
-      Cached_item *tmp=new_Cached_item(join->thd, *group->item);
+      Cached_item *tmp=new_Cached_item(join->thd, *group->item, TRUE);
       if (!tmp || join->group_fields.push_front(tmp))
 	return TRUE;
     }
@@ -15689,6 +21147,38 @@ alloc_group_fields(JOIN *join,ORDER *group)
 }
 
 
+
+/*
+  Test if a single-row cache of items changed, and update the cache.
+
+  @details Test if a list of items that typically represents a result
+  row has changed. If the value of some item changed, update the cached
+  value for this item.
+  
+  @param list list of <item, cached_value> pairs stored as Cached_item.
+
+  @return -1 if no item changed
+  @return index of the first item that changed
+*/
+
+int test_if_item_cache_changed(List<Cached_item> &list)
+{
+  DBUG_ENTER("test_if_item_cache_changed");
+  List_iterator<Cached_item> li(list);
+  int idx= -1,i;
+  Cached_item *buff;
+
+  for (i=(int) list.elements-1 ; (buff=li++) ; i--)
+  {
+    if (buff->cmp())
+      idx=i;
+  }
+  DBUG_PRINT("info", ("idx: %d", idx));
+  DBUG_RETURN(idx);
+}
+
+
+
 static int
 test_if_group_changed(List<Cached_item> &list)
 {
@@ -15786,7 +21276,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
       pos= item;
       if (item->field->flags & BLOB_FLAG)
       {
-	if (!(pos= Item_copy::create(pos)))
+	if (!(pos= new Item_copy_string(pos)))
 	  goto err;
        /*
          Item_copy_string::copy for function can call 
@@ -15820,7 +21310,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
           DBUG_ASSERT (param->field_count > (uint) (copy - copy_start));
           copy->set(tmp, item->result_field);
           item->result_field->move_field(copy->to_ptr,copy->to_null_ptr,1);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
           copy->to_ptr[copy->from_length]= 0;
 #endif
           copy++;
@@ -15828,7 +21318,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
       }
     }
     else if ((real_pos->type() == Item::FUNC_ITEM ||
-	      real_pos->type() == Item::SUBSELECT_ITEM ||
+	      real_pos->real_type() == Item::SUBSELECT_ITEM ||
 	      real_pos->type() == Item::CACHE_ITEM ||
 	      real_pos->type() == Item::COND_ITEM) &&
 	     !real_pos->with_sum_func)
@@ -15840,7 +21330,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
 	 on how the value is to be used: In some cases this may be an
 	 argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
       */
-      if (!(pos= Item_copy::create(pos)))
+      if (!(pos=new Item_copy_string(pos)))
 	goto err;
       if (i < border)                           // HAVING, ORDER and GROUP BY
       {
@@ -15889,12 +21379,14 @@ copy_fields(TMP_TABLE_PARAM *param)
   Copy_field *ptr=param->copy_field;
   Copy_field *end=param->copy_field_end;
 
+  DBUG_ASSERT((ptr != NULL && end >= ptr) || (ptr == NULL && end == NULL));
+
   for (; ptr != end; ptr++)
     (*ptr->do_copy)(ptr);
 
   List_iterator_fast<Item> it(param->copy_funcs);
-  Item_copy *item;
-  while ((item = (Item_copy*) it++))
+  Item_copy_string *item;
+  while ((item = (Item_copy_string*) it++))
     item->copy();
 }
 
@@ -16043,7 +21535,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array,
              ((Item_func*)item)->functype() == Item_func::SUSERVAR_FUNC)
     {
       field= item->get_tmp_table_field();
-      if( field != NULL)
+      if (field != NULL)
       {
         /*
           Replace "@:=<expression>" with "@:=<tmp table column>". Otherwise, we
@@ -16095,6 +21587,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array,
         char buff[256];
         String str(buff,sizeof(buff),&my_charset_bin);
         str.length(0);
+        str.extra_allocation(1024);
         item->print(&str, QT_ORDINARY);
         item_field->name= sql_strmake(str.ptr(),str.length());
       }
@@ -16329,18 +21822,34 @@ static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
   }
   if (thd->is_fatal_error)
     DBUG_RETURN(TRUE);
-
   if (!cond->fixed)
-    cond->fix_fields(thd, (Item**)&cond);
+  {
+    Item *tmp_item= (Item*) cond;
+    cond->fix_fields(thd, &tmp_item);
+    DBUG_ASSERT(cond == tmp_item);
+  }
   if (join_tab->select)
   {
+    Item *cond_copy;
+    UNINIT_VAR(cond_copy); // used when pre_idx_push_select_cond!=NULL
+    if (join_tab->select->pre_idx_push_select_cond)
+      cond_copy= cond->copy_andor_structure(thd);
     if (join_tab->select->cond)
       error=(int) cond->add(join_tab->select->cond);
-    join_tab->select_cond=join_tab->select->cond=cond;
+    join_tab->select->cond= cond;
+    if (join_tab->select->pre_idx_push_select_cond)
+    {
+      Item *new_cond= and_conds(cond_copy, join_tab->select->pre_idx_push_select_cond);
+      if (!new_cond->fixed && new_cond->fix_fields(thd, &new_cond))
+        error= 1;
+      join_tab->pre_idx_push_select_cond=
+        join_tab->select->pre_idx_push_select_cond= new_cond;
+    }
+    join_tab->set_select_cond(cond, __LINE__);
   }
   else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, 0,
                                           &error)))
-    join_tab->select_cond=cond;
+    join_tab->set_select_cond(cond, __LINE__);
 
   DBUG_RETURN(error ? TRUE : FALSE);
 }
@@ -16443,6 +21952,7 @@ static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list,
     if (arg_changed)
     {
       expr->maybe_null= 1;
+      expr->in_rollup= 1;
       *changed= TRUE;
     }
   }
@@ -16506,6 +22016,7 @@ bool JOIN::rollup_init()
       if (*group_tmp->item == item)
       {
         item->maybe_null= 1;
+        item->in_rollup= 1;
         found_in_group= 1;
         break;
       }
@@ -16736,6 +22247,7 @@ int JOIN::rollup_send_data(uint idx)
   uint i;
   for (i= send_group_parts ; i-- > idx ; )
   {
+    int res= 0;
     /* Get reference pointers to sum functions in place */
     memcpy((char*) ref_pointer_array,
 	   (char*) rollup.ref_pointer_arrays[i],
@@ -16743,9 +22255,10 @@ int JOIN::rollup_send_data(uint idx)
     if ((!having || having->val_int()))
     {
       if (send_records < unit->select_limit_cnt && do_send_rows &&
-	  result->send_data(rollup.fields[i]))
+	  (res= result->send_data(rollup.fields[i])) > 0)
 	return 1;
-      send_records++;
+      if (!res)
+        send_records++;
     }
   }
   /* Restore ref_pointer_array */
@@ -16793,10 +22306,12 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg)
           item->save_in_result_field(1);
       }
       copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
-      if ((write_error= table_arg->file->ha_write_row(table_arg->record[0])))
+      if ((write_error= table_arg->file->ha_write_tmp_row(table_arg->record[0])))
       {
-	if (create_myisam_from_heap(thd, table_arg, &tmp_table_param,
-                                    write_error, 0))
+	if (create_internal_tmp_table_from_heap(thd, table_arg, 
+                                                tmp_table_param.start_recinfo,
+                                                &tmp_table_param.recinfo,
+                                                write_error, 0, NULL))
 	  return 1;		     
       }
     }
@@ -16878,7 +22393,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       the UNION to provide precise EXPLAIN information will hardly be
       appreciated :)
     */
-    char table_name_buffer[NAME_LEN];
+    char table_name_buffer[SAFE_NAME_LEN];
     item_list.empty();
     /* id */
     item_list.push_back(new Item_null);
@@ -16939,46 +22454,76 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
     if (result->send_data(item_list))
       join->error= 1;
   }
-  else
+  else if (!join->select_lex->master_unit()->derived ||
+           join->select_lex->master_unit()->derived->is_materialized_derived())
   {
     table_map used_tables=0;
-    for (uint i=0 ; i < join->tables ; i++)
+
+    bool printing_materialize_nest= FALSE;
+    uint select_id= join->select_lex->select_number;
+
+    for (JOIN_TAB *tab= first_breadth_first_tab(join, WALK_OPTIMIZATION_TABS); tab;
+         tab= next_breadth_first_tab(join, WALK_OPTIMIZATION_TABS, tab))
     {
-      JOIN_TAB *tab=join->join_tab+i;
+      if (tab->bush_root_tab)
+      {
+        JOIN_TAB *first_sibling= tab->bush_root_tab->bush_children->start;
+        select_id= first_sibling->emb_sj_nest->sj_subq_pred->get_identifier();
+        printing_materialize_nest= TRUE;
+      }
+
       TABLE *table=tab->table;
       TABLE_LIST *table_list= tab->table->pos_in_table_list;
       char buff[512]; 
-      char buff1[512], buff2[512], buff3[512];
+      char buff1[512], buff2[512], buff3[512], buff4[512];
       char keylen_str_buf[64];
+      my_bool key_read;
       String extra(buff, sizeof(buff),cs);
-      char table_name_buffer[NAME_LEN];
+      char table_name_buffer[SAFE_NAME_LEN];
       String tmp1(buff1,sizeof(buff1),cs);
       String tmp2(buff2,sizeof(buff2),cs);
       String tmp3(buff3,sizeof(buff3),cs);
+      String tmp4(buff4,sizeof(buff4),cs);
+      char hash_key_prefix[]= "#hash#";
+      KEY *key_info= 0;
+      uint key_len= 0;
+      bool is_hj= tab->type == JT_HASH || tab->type ==JT_HASH_NEXT;
+
       extra.length(0);
       tmp1.length(0);
       tmp2.length(0);
       tmp3.length(0);
-
+      tmp4.length(0);
       quick_type= -1;
+
+      /* Don't show eliminated tables */
+      if (table->map & join->eliminated_tables)
+      {
+        used_tables|=table->map;
+        continue;
+      }
+
       item_list.empty();
       /* id */
-      item_list.push_back(new Item_uint((uint32)
-				       join->select_lex->select_number));
+      item_list.push_back(new Item_uint((uint32)select_id));
       /* select_type */
-      item_list.push_back(new Item_string(join->select_lex->type,
-					  strlen(join->select_lex->type),
-					  cs));
-      if (tab->type == JT_ALL && tab->select && tab->select->quick)
+      const char* stype= printing_materialize_nest? "MATERIALIZED" : 
+                                                    join->select_lex->type;
+      item_list.push_back(new Item_string(stype, strlen(stype), cs));
+      
+      if ((tab->type == JT_ALL || tab->type == JT_HASH) &&
+           tab->select && tab->select->quick)
       {
         quick_type= tab->select->quick->get_type();
         if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
+            (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) ||
             (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
             (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
-          tab->type = JT_INDEX_MERGE;
+          tab->type= tab->type == JT_ALL ? JT_INDEX_MERGE : JT_HASH_INDEX_MERGE;
         else
-	  tab->type = JT_RANGE;
+	  tab->type= tab->type == JT_ALL ? JT_RANGE : JT_HASH_RANGE;
       }
+
       /* table */
       if (table->derived_select_number)
       {
@@ -16988,12 +22533,21 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 			     table->derived_select_number);
 	item_list.push_back(new Item_string(table_name_buffer, len, cs));
       }
+      else if (tab->bush_children)
+      {
+        JOIN_TAB *ctab= tab->bush_children->start;
+        /* table */
+        int len= my_snprintf(table_name_buffer, 
+                             sizeof(table_name_buffer)-1,
+                             "<subquery%d>", 
+                             ctab->emb_sj_nest->sj_subq_pred->get_identifier());
+	item_list.push_back(new Item_string(table_name_buffer, len, cs));
+      }
       else
       {
-        TABLE_LIST *real_table= table->pos_in_table_list; 
+        TABLE_LIST *real_table= table->pos_in_table_list;
 	item_list.push_back(new Item_string(real_table->alias,
-					    strlen(real_table->alias),
-					    cs));
+                                            strlen(real_table->alias), cs));
       }
       /* "partitions" column */
       if (join->thd->lex->describe & DESCRIBE_PARTITIONS)
@@ -17040,49 +22594,71 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(item_null);
 
       /* Build "key", "key_len", and "ref" values and add them to item_list */
-      if (tab->ref.key_parts)
+      if (tab->type == JT_NEXT)
+      {
+	key_info= table->key_info+tab->index;
+        key_len= key_info->key_length;
+      }
+      else if (tab->ref.key_parts)
+      {
+	key_info= tab->get_keyinfo_by_key_no(tab->ref.key);
+        key_len= tab->ref.key_length;
+      }
+      if (key_info)
       {
-	KEY *key_info=table->key_info+ tab->ref.key;
         register uint length;
-	item_list.push_back(new Item_string(key_info->name,
-					    strlen(key_info->name),
-					    system_charset_info));
-        length= longlong2str(tab->ref.key_length, keylen_str_buf, 10) - 
-                keylen_str_buf;
-        item_list.push_back(new Item_string(keylen_str_buf, length,
-                                            system_charset_info));
-	for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
+        if (is_hj)
+          tmp2.append(hash_key_prefix, strlen(hash_key_prefix), cs);
+        tmp2.append(key_info->name,  strlen(key_info->name), cs);
+        length= (longlong10_to_str(key_len, keylen_str_buf, 10) - 
+                 keylen_str_buf);
+        tmp3.append(keylen_str_buf, length, cs);
+        if (tab->ref.key_parts)
 	{
-	  if (tmp2.length())
-	    tmp2.append(',');
-	  tmp2.append((*ref)->name(), strlen((*ref)->name()),
-		      system_charset_info);
-	}
-	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+	  for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
+	  {
+	    if (tmp4.length())
+	      tmp4.append(',');
+	    tmp4.append((*ref)->name(), strlen((*ref)->name()), cs);
+          }
+        }
       }
-      else if (tab->type == JT_NEXT)
+      if (is_hj && tab->type != JT_HASH)
       {
-	KEY *key_info=table->key_info+ tab->index;
-        register uint length;
-	item_list.push_back(new Item_string(key_info->name,
-					    strlen(key_info->name),cs));
-        length= longlong2str(key_info->key_length, keylen_str_buf, 10) - 
-                keylen_str_buf;
-        item_list.push_back(new Item_string(keylen_str_buf, 
-                                            length,
-                                            system_charset_info));
-	item_list.push_back(item_null);
+        tmp2.append(':');
+        tmp3.append(':');
       }
-      else if (tab->select && tab->select->quick)
+      if (tab->type == JT_HASH_NEXT)
       {
+        register uint length;
+	key_info= table->key_info+tab->index;
+        key_len= key_info->key_length;
+        tmp2.append(key_info->name,  strlen(key_info->name), cs);
+        length= (longlong10_to_str(key_len, keylen_str_buf, 10) - 
+                 keylen_str_buf);
+        tmp3.append(keylen_str_buf, length, cs);
+      }         
+      if (tab->type != JT_CONST && tab->select && tab->select->quick)
         tab->select->quick->add_keys_and_lengths(&tmp2, &tmp3);
-	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
-	item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
-	item_list.push_back(item_null);
+      if (key_info || (tab->select && tab->select->quick))
+      {
+        if (tmp2.length())
+          item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+        else
+          item_list.push_back(item_null);
+        if (tmp3.length())
+          item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
+        else
+          item_list.push_back(item_null);
+        if (key_info && tab->type != JT_NEXT)
+          item_list.push_back(new Item_string(tmp4.ptr(),tmp4.length(),cs));
+        else
+          item_list.push_back(item_null);
       }
       else
       {
-        if (table_list->schema_table &&
+        if (table_list && /* SJM bushes don't have table_list */
+            table_list->schema_table &&
             table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
         {
           const char *tmp_buff;
@@ -17113,7 +22689,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       
       /* Add "rows" field to item_list. */
-      if (table_list->schema_table)
+      if (table_list /* SJM bushes don't have table_list */ &&
+          table_list->schema_table)
       {
         /* in_rows */
         if (join->thd->lex->describe & DESCRIBE_EXTENDED)
@@ -17123,22 +22700,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       else
       {
-        ha_rows examined_rows;
-        if (tab->select && tab->select->quick)
-          examined_rows= tab->select->quick->records;
-        else if (tab->type == JT_NEXT || tab->type == JT_ALL)
-        {
-          if (tab->limit)
-            examined_rows= tab->limit;
-          else
-          {
-            tab->table->file->info(HA_STATUS_VARIABLE);
-            examined_rows= tab->table->file->stats.records;
-          }
-        }
-        else
-          examined_rows=(ha_rows)join->best_positions[i].records_read; 
- 
+        ha_rows examined_rows= tab->get_examined_rows();
+
         item_list.push_back(new Item_int((longlong) (ulonglong) examined_rows, 
                                          MY_INT64_NUM_DECIMAL_DIGITS));
 
@@ -17147,14 +22710,14 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         {
           float f= 0.0; 
           if (examined_rows)
-            f= (float) (100.0 * join->best_positions[i].records_read /
-                        examined_rows);
+            f= (float) (100.0 * tab->records_read / examined_rows);
+ 	  set_if_smaller(f, 100.0);
           item_list.push_back(new Item_float(f, 2));
         }
       }
 
       /* Build "Extra" field and add it to item_list. */
-      my_bool key_read=table->key_read;
+      key_read=table->key_read;
       if ((tab->type == JT_NEXT || tab->type == JT_CONST) &&
           table->covering_keys.is_set(tab->index))
 	key_read=1;
@@ -17184,8 +22747,21 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       else
       {
+        uint keyno= MAX_KEY;
+        if (tab->ref.key_parts)
+          keyno= tab->ref.key;
+        else if (tab->select && tab->select->quick)
+          keyno = tab->select->quick->index;
+
+        if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno &&
+            table->file->pushed_idx_cond)
+          extra.append(STRING_WITH_LEN("; Using index condition"));
+        else if (tab->cache_idx_cond)
+          extra.append(STRING_WITH_LEN("; Using index condition(BKA)"));
+
         if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
             quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+            quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
             quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
         {
           extra.append(STRING_WITH_LEN("; Using "));
@@ -17206,8 +22782,11 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
           {
             const COND *pushed_cond= tab->table->file->pushed_cond;
 
-            if ((thd->variables.optimizer_switch &
-                 OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) && pushed_cond)
+            if (((thd->variables.optimizer_switch &
+                 OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) ||
+                 (tab->table->file->ha_table_flags() &
+                  HA_MUST_USE_TABLE_CONDITION_PUSHDOWN)) &&
+                pushed_cond)
             {
               extra.append(STRING_WITH_LEN("; Using where with pushed "
                                            "condition"));
@@ -17221,7 +22800,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
               extra.append(STRING_WITH_LEN("; Using where"));
           }
 	}
-        if (table_list->schema_table &&
+        if (table_list /* SJM bushes don't have table_list */ &&
+            table_list->schema_table &&
             table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
         {
           if (!table_list->table_open_method)
@@ -17253,6 +22833,32 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         }
 	if (table->reginfo.not_exists_optimize)
 	  extra.append(STRING_WITH_LEN("; Not exists"));
+
+        /*
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE &&
+            !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags &
+             HA_MRR_USE_DEFAULT_IMPL))
+        {
+	  extra.append(STRING_WITH_LEN("; Using MRR"));
+        }
+        */
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE)
+        {
+          char mrr_str_buf[128];
+          mrr_str_buf[0]=0;
+          int len;
+          uint mrr_flags= 
+            ((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags;
+          len= table->file->multi_range_read_explain_info(mrr_flags,
+                                                          mrr_str_buf,
+                                                          sizeof(mrr_str_buf));
+          if (len > 0)
+          {
+            extra.append(STRING_WITH_LEN("; "));
+            extra.append(mrr_str_buf, len);
+          }
+        }
+
 	if (need_tmp_table)
 	{
 	  need_tmp_table=0;
@@ -17263,8 +22869,40 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	  need_order=0;
 	  extra.append(STRING_WITH_LEN("; Using filesort"));
 	}
-	if (distinct & test_all_bits(used_tables, thd->lex->used_tables))
+	if (distinct & test_all_bits(used_tables,
+                                     join->select_list_used_tables))
 	  extra.append(STRING_WITH_LEN("; Distinct"));
+        if (tab->loosescan_match_tab)
+        {
+          extra.append(STRING_WITH_LEN("; LooseScan"));
+        }
+
+        if (tab->first_weedout_table)
+          extra.append(STRING_WITH_LEN("; Start temporary"));
+        if (tab->check_weed_out_table)
+          extra.append(STRING_WITH_LEN("; End temporary"));
+        else if (tab->do_firstmatch)
+        {
+          if (tab->do_firstmatch == join->join_tab - 1)
+            extra.append(STRING_WITH_LEN("; FirstMatch"));
+          else
+          {
+            extra.append(STRING_WITH_LEN("; FirstMatch("));
+            TABLE *prev_table=tab->do_firstmatch->table;
+            if (prev_table->derived_select_number)
+            {
+              char namebuf[NAME_LEN];
+              /* Derived table name generation */
+              int len= my_snprintf(namebuf, sizeof(namebuf)-1,
+                                   "<derived%u>",
+                                   prev_table->derived_select_number);
+              extra.append(namebuf, len);
+            }
+            else
+              extra.append(prev_table->pos_in_table_list->alias);
+            extra.append(STRING_WITH_LEN(")"));
+          }
+        }
 
         for (uint part= 0; part < tab->ref.key_parts; part++)
         {
@@ -17274,8 +22912,12 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
             break;
           }
         }
-        if (i > 0 && tab[-1].next_select == sub_select_cache)
+
+        if (tab->cache)
+	{
           extra.append(STRING_WITH_LEN("; Using join buffer"));
+          tab->cache->print_explain_comment(&extra);
+        }
         
         /* Skip initial "; "*/
         const char *str= extra.ptr();
@@ -17287,6 +22929,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         }
 	item_list.push_back(new Item_string(str, len, cs));
       }
+      
       // For next iteration
       used_tables|=table->map;
       if (result->send_data(item_list))
@@ -17297,8 +22940,31 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
        unit;
        unit= unit->next_unit())
   {
-    if (mysql_explain_union(thd, unit, result))
-      DBUG_VOID_RETURN;
+    /*
+      This fix_fields() call is to handle an edge case like this:
+       
+        SELECT ... UNION SELECT ... ORDER BY (SELECT ...)
+      
+      for such queries, we'll get here before having called
+      subquery_expr->fix_fields(), which will cause failure to
+    */
+    if (unit->item && !unit->item->fixed)
+    {
+      Item *ref= unit->item;
+      if (unit->item->fix_fields(thd, &ref))
+        DBUG_VOID_RETURN;
+      DBUG_ASSERT(ref == unit->item);
+    }
+
+    /* 
+      Display subqueries only if they are not parts of eliminated WHERE/ON
+      clauses.
+    */
+    if (!(unit->item && unit->item->eliminated))
+    {
+      if (mysql_explain_union(thd, unit, result))
+        DBUG_VOID_RETURN;
+    }
   }
   DBUG_VOID_RETURN;
 }
@@ -17310,28 +22976,12 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
   bool res= 0;
   SELECT_LEX *first= unit->first_select();
 
-  for (SELECT_LEX *sl= first;
-       sl;
-       sl= sl->next_select())
-  {
-    // drop UNCACHEABLE_EXPLAIN, because it is for internal usage only
-    uint8 uncacheable= (sl->uncacheable & ~UNCACHEABLE_EXPLAIN);
-    sl->type= (((&thd->lex->select_lex)==sl)?
-	       (sl->first_inner_unit() || sl->next_select() ? 
-		"PRIMARY" : "SIMPLE"):
-	       ((sl == first)?
-		((sl->linkage == DERIVED_TABLE_TYPE) ?
-		 "DERIVED":
-		 ((uncacheable & UNCACHEABLE_DEPENDENT) ?
-		  "DEPENDENT SUBQUERY":
-		  (uncacheable?"UNCACHEABLE SUBQUERY":
-		   "SUBQUERY"))):
-		((uncacheable & UNCACHEABLE_DEPENDENT) ?
-		 "DEPENDENT UNION":
-		 uncacheable?"UNCACHEABLE UNION":
-		 "UNION")));
+  for (SELECT_LEX *sl= first; sl; sl= sl->next_select())
+  {
+    sl->set_explain_type();
     sl->options|= SELECT_DESCRIBE;
   }
+
   if (unit->is_union())
   {
     unit->fake_select_lex->select_number= UINT_MAX; // jost for initialization
@@ -17339,12 +22989,6 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
     unit->fake_select_lex->options|= SELECT_DESCRIBE;
     if (!(res= unit->prepare(thd, result, SELECT_NO_UNLOCK | SELECT_DESCRIBE)))
       res= unit->exec();
-    /*
-      Reset select option. Needed if fake_select_lex is used and not called
-      from select describe.
-    */
-    unit->fake_select_lex->options&= ~SELECT_DESCRIBE;
-    res|= unit->cleanup();
   }
   else
   {
@@ -17367,6 +23011,70 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
 }
 
 
+static void print_table_array(THD *thd, 
+                              table_map eliminated_tables,
+                              String *str, TABLE_LIST **table, 
+                              TABLE_LIST **end,
+                              enum_query_type query_type)
+{
+  (*table)->print(thd, eliminated_tables, str, query_type);
+
+  for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
+  {
+    TABLE_LIST *curr= *tbl;
+    
+    /*
+      The "eliminated_tables &&" check guards againist the case of 
+      printing the query for CREATE VIEW. We do that without having run 
+      JOIN::optimize() and so will have nested_join->used_tables==0.
+    */
+    if (eliminated_tables &&
+        ((curr->table && (curr->table->map & eliminated_tables)) ||
+         (curr->nested_join && !(curr->nested_join->used_tables &
+                                ~eliminated_tables))))
+    {
+      /* as of 5.5, print_join doesnt put eliminated elements into array */
+      DBUG_ASSERT(0); 
+      continue;
+    }
+
+    if (curr->outer_join)
+    {
+      /* MySQL converts right to left joins */
+      str->append(STRING_WITH_LEN(" left join "));
+    }
+    else if (curr->straight)
+      str->append(STRING_WITH_LEN(" straight_join "));
+    else if (curr->sj_inner_tables)
+      str->append(STRING_WITH_LEN(" semi join "));
+    else
+      str->append(STRING_WITH_LEN(" join "));
+    curr->print(thd, eliminated_tables, str, query_type);
+    if (curr->on_expr)
+    {
+      str->append(STRING_WITH_LEN(" on("));
+      curr->on_expr->print(str, query_type);
+      str->append(')');
+    }
+  }
+}
+
+
+/*
+  Check if the passed table is 
+   - a base table which was eliminated, or
+   - a join nest which only contained eliminated tables (and so was eliminated,
+     too)
+*/
+
+static bool is_eliminated_table(table_map eliminated_tables, TABLE_LIST *tbl)
+{
+  return eliminated_tables &&
+    ((tbl->table && (tbl->table->map & eliminated_tables)) ||
+     (tbl->nested_join && !(tbl->nested_join->used_tables &
+                            ~eliminated_tables)));
+}
+
 /**
   Print joins from the FROM clause.
 
@@ -17377,6 +23085,7 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
 */
 
 static void print_join(THD *thd,
+                       table_map eliminated_tables,
                        String *str,
                        List<TABLE_LIST> *tables,
                        enum_query_type query_type)
@@ -17387,8 +23096,14 @@ static void print_join(THD *thd,
   uint non_const_tables= 0;
 
   for (TABLE_LIST *t= ti++; t ; t= ti++)
-    if (!t->optimized_away)
+  {
+    /* 
+      See comment in print_table_array() about the second part of the
+      condition 
+    */
+    if (!t->optimized_away && !is_eliminated_table(eliminated_tables, t))
       non_const_tables++;
+  }
   if (!non_const_tables)
   {
     str->append(STRING_WITH_LEN("dual"));
@@ -17403,38 +23118,42 @@ static void print_join(THD *thd,
   TABLE_LIST *tmp, **t= table + (non_const_tables - 1);
   while ((tmp= ti++))
   {
-    if (tmp->optimized_away)
+    if (tmp->optimized_away || is_eliminated_table(eliminated_tables, tmp))
       continue;
     *t--= tmp;
   }
 
   DBUG_ASSERT(tables->elements >= 1);
-  (*table)->print(thd, str, query_type);
-
-  TABLE_LIST **end= table + non_const_tables;
-  for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
+  /*
+    Assert that the first table in the list isn't eliminated. This comes from
+    the fact that the first table can't be inner table of an outer join.
+  */
+  DBUG_ASSERT(!eliminated_tables || 
+              !(((*table)->table && ((*table)->table->map & eliminated_tables)) ||
+                ((*table)->nested_join && !((*table)->nested_join->used_tables &
+                                           ~eliminated_tables))));
+  /* 
+    If the first table is a semi-join nest, swap it with something that is
+    not a semi-join nest.
+  */
+  if ((*table)->sj_inner_tables)
   {
-    TABLE_LIST *curr= *tbl;
-    if (curr->outer_join)
-    {
-      /* MySQL converts right to left joins */
-      str->append(STRING_WITH_LEN(" left join "));
-    }
-    else if (curr->straight)
-      str->append(STRING_WITH_LEN(" straight_join "));
-    else
-      str->append(STRING_WITH_LEN(" join "));
-    curr->print(thd, str, query_type);
-    if (curr->on_expr)
+    TABLE_LIST **end= table + non_const_tables;
+    for (TABLE_LIST **t2= table; t2!=end; t2++)
     {
-      str->append(STRING_WITH_LEN(" on("));
-      curr->on_expr->print(str, query_type);
-      str->append(')');
+      if (!(*t2)->sj_inner_tables)
+      {
+        TABLE_LIST *tmp= *t2;
+        *t2= *table;
+        *table= tmp;
+        break;
+      }
     }
   }
+  print_table_array(thd, eliminated_tables, str, table, 
+                    table +  non_const_tables, query_type);
 }
 
-
 /**
   @brief Print an index hint
 
@@ -17478,14 +23197,41 @@ Index_hint::print(THD *thd, String *str)
   @param str   string where table should be printed
 */
 
-void TABLE_LIST::print(THD *thd, String *str, enum_query_type query_type)
+void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, 
+                       enum_query_type query_type)
 {
   if (nested_join)
   {
     str->append('(');
-    print_join(thd, str, &nested_join->join_list, query_type);
+    print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type);
     str->append(')');
   }
+  else if (jtbm_subselect)
+  {
+    if (jtbm_subselect->engine->engine_type() ==
+          subselect_engine::SINGLE_SELECT_ENGINE)
+    {
+      /* 
+        We get here when conversion into materialization didn't finish (this
+        happens when
+        - The subquery is a degenerate case which produces 0 or 1 record
+        - subquery's optimization didn't finish because of @@max_join_size
+          limits
+        - ... maybe some other cases like this 
+      */
+      str->append(STRING_WITH_LEN(" <materialize> ("));
+      jtbm_subselect->engine->print(str, query_type);
+      str->append(')');
+    }
+    else
+    {
+      str->append(STRING_WITH_LEN(" <materialize> ("));
+      subselect_hash_sj_engine *hash_engine;
+      hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine;
+      hash_engine->materialize_engine->print(str, query_type);
+      str->append(')');
+    }
+  }
   else
   {
     const char *cmp_name;                         // Name to compare with alias
@@ -17568,12 +23314,21 @@ void TABLE_LIST::print(THD *thd, String *str, enum_query_type query_type)
 
 void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
 {
-  /* QQ: thd may not be set for sub queries, but this should be fixed */
-  if (!thd)
-    thd= current_thd;
+  DBUG_ASSERT(thd);
 
   str->append(STRING_WITH_LEN("select "));
 
+  if (join && join->cleaned)
+  {
+    /*
+      JOIN already cleaned up so it is dangerous to print items
+      because temporary tables they pointed on could be freed.
+    */
+    str->append('#');
+    str->append(select_number);
+    return;
+  }
+
   /* First add options */
   if (options & SELECT_STRAIGHT_JOIN)
     str->append(STRING_WITH_LEN("straight_join "));
@@ -17614,7 +23369,7 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
     else
       str->append(',');
 
-    if (master_unit()->item && item->is_autogenerated_name)
+    if (is_subquery_function() && item->is_autogenerated_name)
     {
       /*
         Do not print auto-generated aliases in subqueries. It has no purpose
@@ -17634,7 +23389,7 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
   {
     str->append(STRING_WITH_LEN(" from "));
     /* go through join tree */
-    print_join(thd, str, &top_join_list, query_type);
+    print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list, query_type);
   }
   else if (where)
   {
@@ -17718,6 +23473,8 @@ bool JOIN::change_result(select_result *res)
 {
   DBUG_ENTER("JOIN::change_result");
   result= res;
+  if (tmp_join)
+    tmp_join->result= res;
   if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
                      result->prepare2()))
   {
@@ -17726,6 +23483,225 @@ bool JOIN::change_result(select_result *res)
   DBUG_RETURN(FALSE);
 }
 
+
+/**
+  @brief
+  Set allowed types of join caches that can be used for join operations
+
+  @details
+  The function sets a bitmap of allowed join buffers types in the field
+  allowed_join_cache_types of this JOIN structure:
+    bit 1 is set if tjoin buffers are allowed to be incremental
+    bit 2 is set if the join buffers are allowed to be hashed
+    but 3 is set if the join buffers are allowed to be used for BKA
+  join algorithms.
+  The allowed types are read from system variables.
+  Besides the function sets maximum allowed join cache level that is
+  also read from a system variable.
+*/
+
+void JOIN::set_allowed_join_cache_types()
+{
+  allowed_join_cache_types= 0;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL))
+    allowed_join_cache_types|= JOIN_CACHE_INCREMENTAL_BIT;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED))
+    allowed_join_cache_types|= JOIN_CACHE_HASHED_BIT;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA))
+    allowed_join_cache_types|= JOIN_CACHE_BKA_BIT;
+  allowed_semijoin_with_cache=
+    optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE);
+  allowed_outer_join_with_cache=
+    optimizer_flag(thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE);
+  max_allowed_join_cache_level= thd->variables.join_cache_level;
+}
+
+
+/**
+  Save a query execution plan so that the caller can revert to it if needed,
+  and reset the current query plan so that it can be reoptimized.
+
+  @param save_to  The object into which the current query plan state is saved
+*/
+
+void JOIN::save_query_plan(Join_plan_state *save_to)
+{
+  if (keyuse.elements)
+  {
+    DYNAMIC_ARRAY tmp_keyuse;
+    /* Swap the current and the backup keyuse internal arrays. */
+    tmp_keyuse= keyuse;
+    keyuse= save_to->keyuse; /* keyuse is reset to an empty array. */
+    save_to->keyuse= tmp_keyuse;
+
+    for (uint i= 0; i < table_count; i++)
+    {
+      save_to->join_tab_keyuse[i]= join_tab[i].keyuse;
+      join_tab[i].keyuse= NULL;
+      save_to->join_tab_checked_keys[i]= join_tab[i].checked_keys;
+      join_tab[i].checked_keys.clear_all();
+    }
+  }
+  memcpy((uchar*) save_to->best_positions, (uchar*) best_positions,
+         sizeof(POSITION) * (table_count + 1));
+  memset(best_positions, 0, sizeof(POSITION) * (table_count + 1));
+  
+  /* Save SJM nests */
+  List_iterator<TABLE_LIST> it(select_lex->sj_nests);
+  TABLE_LIST *tlist;
+  SJ_MATERIALIZATION_INFO **p_info= save_to->sj_mat_info;
+  while ((tlist= it++))
+  {
+    *(p_info++)= tlist->sj_mat_info;
+  }
+}
+
+
+/**
+  Reset a query execution plan so that it can be reoptimized in-place.
+*/
+void JOIN::reset_query_plan()
+{
+  for (uint i= 0; i < table_count; i++)
+  {
+    join_tab[i].keyuse= NULL;
+    join_tab[i].checked_keys.clear_all();
+  }
+}
+
+
+/**
+  Restore a query execution plan previously saved by the caller.
+
+  @param The object from which the current query plan state is restored.
+*/
+
+void JOIN::restore_query_plan(Join_plan_state *restore_from)
+{
+  if (restore_from->keyuse.elements)
+  {
+    DYNAMIC_ARRAY tmp_keyuse;
+    tmp_keyuse= keyuse;
+    keyuse= restore_from->keyuse;
+    restore_from->keyuse= tmp_keyuse;
+
+    for (uint i= 0; i < table_count; i++)
+    {
+      join_tab[i].keyuse= restore_from->join_tab_keyuse[i];
+      join_tab[i].checked_keys= restore_from->join_tab_checked_keys[i];
+    }
+
+  }
+  memcpy((uchar*) best_positions, (uchar*) restore_from->best_positions,
+         sizeof(POSITION) * (table_count + 1));
+  /* Restore SJM nests */
+  List_iterator<TABLE_LIST> it(select_lex->sj_nests);
+  TABLE_LIST *tlist;
+  SJ_MATERIALIZATION_INFO **p_info= restore_from->sj_mat_info;
+  while ((tlist= it++))
+  {
+    tlist->sj_mat_info= *(p_info++);
+  }
+}
+
+
+/**
+  Reoptimize a query plan taking into account an additional conjunct to the
+  WHERE clause.
+
+  @param added_where  An extra conjunct to the WHERE clause to reoptimize with
+  @param join_tables  The set of tables to reoptimize
+  @param save_to      If != NULL, save here the state of the current query plan,
+                      otherwise reuse the existing query plan structures.
+
+  @notes
+  Given a query plan that was already optimized taking into account some WHERE
+  clause 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'.
+  The reoptimization works as follows:
+
+  1. Call update_ref_and_keys *only* for the new conditions 'added_where'
+     that are about to be injected into the query.
+  2. Expand if necessary the original KEYUSE array JOIN::keyuse to
+     accommodate the new REF accesses computed for the 'added_where' condition.
+  3. Add the new KEYUSEs into JOIN::keyuse.
+  4. Re-sort and re-filter the JOIN::keyuse array with the newly added
+     KEYUSE elements. 
+ 
+  @retval REOPT_NEW_PLAN  there is a new plan.
+  @retval REOPT_OLD_PLAN  no new improved plan was produced, use the old one.
+  @retval REOPT_ERROR     an irrecovarable error occured during reoptimization.
+*/
+
+JOIN::enum_reopt_result
+JOIN::reoptimize(Item *added_where, table_map join_tables,
+                 Join_plan_state *save_to)
+{
+  DYNAMIC_ARRAY added_keyuse;
+  SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
+  uint org_keyuse_elements;
+
+  /* Re-run the REF optimizer to take into account the new conditions. */
+  if (update_ref_and_keys(thd, &added_keyuse, join_tab, table_count, added_where,
+                          ~outer_join, select_lex, &sargables))
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_ERROR;
+  }
+
+  if (!added_keyuse.elements)
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_OLD_PLAN;
+  }
+
+  if (save_to)
+    save_query_plan(save_to);
+  else
+    reset_query_plan();
+
+  if (!keyuse.buffer &&
+      my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64))
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_ERROR;
+  }
+
+  org_keyuse_elements= save_to ? save_to->keyuse.elements : keyuse.elements;
+  allocate_dynamic(&keyuse, org_keyuse_elements + added_keyuse.elements);
+
+  /* If needed, add the access methods from the original query plan. */
+  if (save_to)
+  {
+    DBUG_ASSERT(!keyuse.elements);
+    memcpy(keyuse.buffer,
+           save_to->keyuse.buffer,
+           (size_t) save_to->keyuse.elements * keyuse.size_of_element);
+    keyuse.elements= save_to->keyuse.elements;
+  }
+
+  /* Add the new access methods to the keyuse array. */
+  memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
+         added_keyuse.buffer,
+         (size_t) added_keyuse.elements * added_keyuse.size_of_element);
+  keyuse.elements+= added_keyuse.elements;
+  /* added_keyuse contents is copied, and it is no longer needed. */
+  delete_dynamic(&added_keyuse);
+
+  if (sort_and_filter_keyuse(thd, &keyuse, true))
+    return REOPT_ERROR;
+  optimize_keyuse(this, &keyuse);
+
+  if (optimize_semijoin_nests(this, join_tables))
+    return REOPT_ERROR;
+
+  /* Re-run the join optimizer to compute a new query plan. */
+  if (choose_plan(this, join_tables))
+    return REOPT_ERROR;
+
+  return REOPT_NEW_PLAN;
+}
+
+
 /**
   Cache constant expressions in WHERE, HAVING, ON conditions.
 */
@@ -17736,7 +23712,7 @@ void JOIN::cache_const_exprs()
   bool *analyzer_arg= &cache_flag;
 
   /* No need in cache if all tables are constant. */
-  if (const_tables == tables)
+  if (const_tables == table_count)
     return;
 
   if (conds)
@@ -17747,7 +23723,8 @@ void JOIN::cache_const_exprs()
     having->compile(&Item::cache_const_expr_analyzer, (uchar **)&analyzer_arg,
                     &Item::cache_const_expr_transformer, (uchar *)&cache_flag);
 
-  for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++)
+  for (JOIN_TAB *tab= first_depth_first_tab(this); tab;
+       tab= next_depth_first_tab(this, tab))
   {
     if (*tab->on_expr_ref)
     {
@@ -17793,7 +23770,7 @@ void JOIN::cache_const_exprs()
 static bool
 test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
                          key_map usable_keys,  int ref_key,
-                         ha_rows select_limit,
+                         ha_rows select_limit_arg,
                          int *new_key, int *new_key_direction,
                          ha_rows *new_select_limit, uint *new_used_key_parts,
                          uint *saved_best_key_parts)
@@ -17819,13 +23796,14 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
   ha_rows table_records= table->file->stats.records;
   bool group= join && join->group && order == join->group_list;
   ha_rows ref_key_quick_rows= HA_POS_ERROR;
+  const bool has_limit= (select_limit_arg != HA_POS_ERROR);
 
   /*
     If not used with LIMIT, only use keys if the whole query can be
     resolved with a key;  This is because filesort() is usually faster than
     retrieving all rows through an index.
   */
-  if (select_limit >= table_records)
+  if (select_limit_arg >= table_records)
   {
     keys= *table->file->keys_to_use_for_scanning();
     keys.merge(table->covering_keys);
@@ -17843,14 +23821,15 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
   else
     keys= usable_keys;
 
-  if (ref_key >= 0 && table->covering_keys.is_set(ref_key))
+  if (ref_key >= 0 && ref_key != MAX_KEY &&
+      table->covering_keys.is_set(ref_key))
     ref_key_quick_rows= table->quick_rows[ref_key];
 
   if (join)
   {
     uint tablenr= tab - join->join_tab;
     read_time= join->best_positions[tablenr].read_time;
-    for (uint i= tablenr+1; i < join->tables; i++)
+    for (uint i= tablenr+1; i < join->table_count; i++)
       fanout*= join->best_positions[i].records_read; // fanout is always >= 1
   }
   else
@@ -17859,7 +23838,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
   for (nr=0; nr < table->s->keys ; nr++)
   {
     int direction;
-    uint used_key_parts;
+    ha_rows select_limit= select_limit_arg;
+    uint used_key_parts= 0;
 
     if (keys.is_set(nr) &&
         (direction= test_if_order_by_key(order, table, nr, &used_key_parts)))
@@ -17871,10 +23851,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
       */
       DBUG_ASSERT (ref_key != (int) nr);
 
-      bool is_covering= table->covering_keys.is_set(nr) ||
-                        (nr == table->s->primary_key &&
-                        table->file->primary_key_is_clustered());
-      
+      bool is_covering= (table->covering_keys.is_set(nr) ||
+                         (table->file->index_flags(nr, 0, 1) &
+                          HA_CLUSTERED_INDEX));
       /* 
         Don't use an index scan with ORDER BY without limit.
         For GROUP BY without limit always use index scan
@@ -17901,9 +23880,49 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
             key (e.g. as in Innodb). 
             See Bug #28591 for details.
           */  
-          rec_per_key= used_key_parts &&
-                       used_key_parts <= keyinfo->key_parts ?
+          uint used_index_parts= keyinfo->key_parts;
+          uint used_pk_parts= 0;
+          if (used_key_parts > used_index_parts)
+            used_pk_parts= used_key_parts-used_index_parts;
+          rec_per_key= used_key_parts ?
                        keyinfo->rec_per_key[used_key_parts-1] : 1;
+          /* Take into account the selectivity of the used pk prefix */
+          if (used_pk_parts)
+	  {
+            KEY *pkinfo=tab->table->key_info+table->s->primary_key;
+            /*
+              If the values of of records per key for the prefixes
+              of the primary key are considered unknown we assume
+              they are equal to 1.
+	    */
+            if (used_key_parts == pkinfo->key_parts ||
+                pkinfo->rec_per_key[0] == 0)
+              rec_per_key= 1;                 
+            if (rec_per_key > 1)
+	    {
+              rec_per_key*= pkinfo->rec_per_key[used_pk_parts-1];
+              rec_per_key/= pkinfo->rec_per_key[0];
+              /* 
+                The value of rec_per_key for the extended key has
+                to be adjusted accordingly if some components of
+                the secondary key are included in the primary key.
+	      */
+               for(uint i= 0; i < used_pk_parts; i++)
+	      {
+	        if (pkinfo->key_part[i].field->key_start.is_set(nr))
+	        {
+                  /* 
+                    We presume here that for any index rec_per_key[i] != 0
+                    if rec_per_key[0] != 0.
+	          */
+                  DBUG_ASSERT(pkinfo->rec_per_key[i]);
+                  DBUG_ASSERT(i > 0);
+                  rec_per_key*= pkinfo->rec_per_key[i-1];
+                  rec_per_key/= pkinfo->rec_per_key[i];
+                }
+	      }
+            }    
+          }
           set_if_bigger(rec_per_key, 1);
           /*
             With a grouping query each group containing on average
@@ -17911,10 +23930,11 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
             be included into the result set.
           */  
           if (select_limit > table_records/rec_per_key)
-              select_limit= table_records;
+            select_limit= table_records;
           else
             select_limit= (ha_rows) (select_limit*rec_per_key);
-        }
+        } /* group */
+
         /* 
           If tab=tk is not the last joined table tn then to get first
           L records from the result set we can expect to retrieve
@@ -17958,8 +23978,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
         */
         index_scan_time= select_limit/rec_per_key *
                          min(rec_per_key, table->file->scan_time());
-        if ((ref_key < 0 && is_covering) || 
-            (ref_key < 0 && (group || table->force_index)) ||
+        if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
             index_scan_time < read_time)
         {
           ha_rows quick_records= table_records;
@@ -17971,7 +23990,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
           if (best_key < 0 ||
               (select_limit <= min(quick_records,best_records) ?
                keyinfo->key_parts < best_key_parts :
-               quick_records < best_records))
+               quick_records < best_records) ||
+              (!is_best_covering && is_covering))
           {
             best_key= nr;
             best_key_parts= keyinfo->key_parts;
@@ -17992,7 +24012,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
   
   *new_key= best_key;
   *new_key_direction= best_key_direction;
-  *new_select_limit= best_select_limit;
+  *new_select_limit= has_limit ? best_select_limit : table_records;
   if (new_used_key_parts != NULL)
     *new_used_key_parts= best_key_parts;
 
@@ -18026,16 +24046,6 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
 uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
                          ha_rows limit, bool *need_sort, bool *reverse)
 {
-  if (select && select->quick && select->quick->unique_key_range())
-  { // Single row select (always "ordered"): Ok to use with key field UPDATE
-    *need_sort= FALSE;
-    /*
-      Returning of MAX_KEY here prevents updating of used_key_is_modified
-      in mysql_update(). Use quick select "as is".
-    */
-    return MAX_KEY;
-  }
-
   if (!order)
   {
     *need_sort= FALSE;