1 files changed, 1287 insertions, 59 deletions
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 93885e23f76..1c24ed37581 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -17,6 +17,8 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 
+#ifndef SQL_SELECT_INCLUDED
+#define SQL_SELECT_INCLUDED
 /**
   @file
 
@@ -35,6 +37,16 @@
 #include "opt_range.h"                /* SQL_SELECT, QUICK_SELECT_I */
 
 
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
+#include "../storage/maria/ha_maria.h"
+#define TMP_ENGINE_HTON maria_hton
+#else
+#define TMP_ENGINE_HTON myisam_hton
+#endif
+/* Values in optimize */
+#define KEY_OPTIMIZE_EXISTS		1
+#define KEY_OPTIMIZE_REF_OR_NULL	2
+
 typedef struct keyuse_t {
   TABLE *table;
   Item	*val;				/**< or value if no field */
@@ -58,6 +70,11 @@ typedef struct keyuse_t {
     NULL  - Otherwise (the source equality can't be turned off)
   */
   bool *cond_guard;
+  /*
+     0..64    <=> This was created from semi-join IN-equality # sj_pred_no.
+     MAX_UINT  Otherwise
+  */
+  uint         sj_pred_no;
 } KEYUSE;
 
 class store_key;
@@ -99,32 +116,17 @@ typedef struct st_table_ref
     in the join.
   */
   ha_rows       use_count;
-} TABLE_REF;
-
-
-
-#define CACHE_BLOB      1        /* blob field  */
-#define CACHE_STRIPPED  2        /* field stripped of trailing spaces */
-
-/**
-  CACHE_FIELD and JOIN_CACHE is used on full join to cache records in outer
-  table
-*/
 
-typedef struct st_cache_field {
-  uchar *str;
-  uint length, blob_length;
-  Field *field;
-  uint type;    /**< category of the of the copied field (CACHE_BLOB et al.) */
-} CACHE_FIELD;
+  /*
+    TRUE <=> disable the "cache" as doing lookup with the same key value may
+    produce different results (because of Index Condition Pushdown)
 
+  */
+  bool          disable_cache;
 
-typedef struct st_join_cache {
-  uchar *buff,*pos,*end;
-  uint records,record_nr,ptr_record,fields,length,blobs;
-  CACHE_FIELD *field,**blob_ptr;
-  SQL_SELECT *select;
-} JOIN_CACHE;
+  bool tmp_table_index_lookup_init(THD *thd, KEY *tmp_key, Item_iterator &it,
+                                   bool value);
+} TABLE_REF;
 
 
 /*
@@ -153,15 +155,27 @@ enum enum_nested_loop_state
 typedef enum_nested_loop_state
 (*Next_select_func)(JOIN *, struct st_join_table *, bool);
 Next_select_func setup_end_select_func(JOIN *join);
+int rr_sequential(READ_RECORD *info);
+
 
+class JOIN_CACHE;
+class SJ_TMP_TABLE;
 
 typedef struct st_join_table {
   st_join_table() {}                          /* Remove gcc warning */
   TABLE		*table;
   KEYUSE	*keyuse;			/**< pointer to first used key */
   SQL_SELECT	*select;
-  COND		*select_cond;
+  COND          *select_cond;
   QUICK_SELECT_I *quick;
+  /* 
+    The value of select_cond before we've attempted to do Index Condition
+    Pushdown. We may need to restore everything back if we first choose one
+    index but then reconsider (see test_if_skip_sort_order() for such
+    scenarios).
+    NULL means no index condition pushdown was performed.
+  */
+  Item          *pre_idx_push_select_cond;
   Item	       **on_expr_ref;   /**< pointer to the associated on expression   */
   COND_EQUAL    *cond_equal;    /**< multiple equalities for the on expression */
   st_join_table *first_inner;   /**< first inner table for including outerjoin */
@@ -213,6 +227,9 @@ typedef struct st_join_table {
   uint		use_quick,index;
   uint		status;				///< Save status for cache
   uint		used_fields,used_fieldlength,used_blobs;
+  uint          used_null_fields;
+  uint          used_rowid_fields;
+  uint          used_uneven_bit_fields;
   enum join_type type;
   bool		cached_eq_ref_table,eq_ref_table,not_used_in_distinct;
   bool		sorted;
@@ -223,11 +240,70 @@ typedef struct st_join_table {
   */ 
   ha_rows       limit; 
   TABLE_REF	ref;
-  JOIN_CACHE	cache;
+  bool          use_join_cache;
+  JOIN_CACHE	*cache;
+  /*
+    Index condition for BKA access join
+  */
+  Item          *cache_idx_cond;
+  SQL_SELECT    *cache_select;
   JOIN		*join;
-  /** Bitmap of nested joins this table is part of */
+  /*
+    Embedding SJ-nest (may be not the direct parent), or NULL if none.
+    This variable holds the result of table pullout.
+  */
+  TABLE_LIST    *emb_sj_nest;
+
+  /* FirstMatch variables (final QEP) */
+  struct st_join_table *first_sj_inner_tab;
+  struct st_join_table *last_sj_inner_tab;
+
+  /* Variables for semi-join duplicate elimination */
+  SJ_TMP_TABLE  *flush_weedout_table;
+  SJ_TMP_TABLE  *check_weed_out_table;
+  
+  /*
+    If set, means we should stop join enumeration after we've got the first
+    match and return to the specified join tab. May point to
+    join->join_tab[-1] which means stop join execution after the first
+    match.
+  */
+  struct st_join_table  *do_firstmatch;
+ 
+  /* 
+     ptr  - We're doing a LooseScan, this join tab is the first (i.e. 
+            "driving") join tab), and ptr points to the last join tab
+            handled by the strategy. loosescan_match_tab->found_match
+            should be checked to see if the current value group had a match.
+     NULL - Not doing a loose scan on this join tab.
+  */
+  struct st_join_table *loosescan_match_tab;
+
+  /* Buffer to save index tuple to be able to skip duplicates */
+  uchar *loosescan_buf;
+  
+  /* Length of key tuple (depends on #keyparts used) to store in the above */
+  uint loosescan_key_len;
+
+  /* Used by LooseScan. TRUE<=> there has been a matching record combination */
+  bool found_match;
+  
+  /*
+    Used by DuplicateElimination. tab->table->ref must have the rowid
+    whenever we have a current record.
+  */
+  int  keep_current_rowid;
+
+  /* NestedOuterJoins: Bitmap of nested joins this table is part of */
   nested_join_map embedding_map;
 
+  /*
+    Semi-join strategy to be used for this join table. This is a copy of
+    POSITION::sj_strategy field. This field is set up by the
+    fix_semijion_strategies_for_picked_join_order.
+  */
+  uint sj_strategy;
+
   void cleanup();
   inline bool is_using_loose_index_scan()
   {
@@ -240,12 +316,929 @@ typedef struct st_join_table {
     return (is_using_loose_index_scan() &&
             ((QUICK_GROUP_MIN_MAX_SELECT *)select->quick)->is_agg_distinct());
   }
+  bool check_rowid_field()
+  {
+    if (keep_current_rowid && !used_rowid_fields)
+    {
+      used_rowid_fields= 1;
+      used_fieldlength+= table->file->ref_length;
+    }
+    return test(used_rowid_fields);
+  }
+  bool is_inner_table_of_semi_join_with_first_match()
+  {
+    return first_sj_inner_tab != NULL;
+  }
+  bool is_inner_table_of_outer_join()
+  {
+    return first_inner != NULL;
+  }
+  bool is_single_inner_of_semi_join_with_first_match()
+  {
+    return first_sj_inner_tab == this && last_sj_inner_tab == this;            
+  }
+  bool is_single_inner_of_outer_join()
+  {
+    return first_inner == this && first_inner->last_inner == this;
+  }
+  bool is_first_inner_for_outer_join()
+  {
+    return first_inner && first_inner == this;
+  }
+  bool use_match_flag()
+  {
+    return is_first_inner_for_outer_join() || first_sj_inner_tab == this ; 
+  }
+  bool check_only_first_match()
+  {
+    return is_inner_table_of_semi_join_with_first_match() ||
+           (is_inner_table_of_outer_join() &&
+            table->reginfo.not_exists_optimize);
+  }
+  bool is_last_inner_table()
+  {
+    return (first_inner && first_inner->last_inner == this) ||
+           last_sj_inner_tab == this;
+  }
+  struct st_join_table *get_first_inner_table()
+  {
+    if (first_inner)
+      return first_inner;
+    return first_sj_inner_tab; 
+  }
+  void set_select_cond(COND *to, uint line)
+  {
+    DBUG_PRINT("info", ("select_cond changes %p -> %p at line %u tab %p",
+                        select_cond, to, line, this));
+    select_cond= to;
+  }
+  COND *set_cond(COND *new_cond)
+  {
+    COND *tmp_select_cond= select_cond;
+    set_select_cond(new_cond, __LINE__);
+    if (select)
+      select->cond= new_cond;
+    return tmp_select_cond;
+  }
 } JOIN_TAB;
 
+
+/* 
+  Categories of data fields of variable length written into join cache buffers.
+  The value of any of these fields is written into cache together with the
+  prepended length of the value.     
+*/
+#define CACHE_BLOB      1        /* blob field  */
+#define CACHE_STRIPPED  2        /* field stripped of trailing spaces */
+#define CACHE_VARSTR1   3        /* short string value (length takes 1 byte) */ 
+#define CACHE_VARSTR2   4        /* long string value (length takes 2 bytes) */
+
+/*
+  The CACHE_FIELD structure used to describe fields of records that
+  are written into a join cache buffer from record buffers and backward.
+*/
+typedef struct st_cache_field {
+  uchar *str;   /**< buffer from/to where the field is to be copied */ 
+  uint length;  /**< maximal number of bytes to be copied from/to str */
+  /* 
+    Field object for the moved field
+    (0 - for a flag field, see JOIN_CACHE::create_flag_fields).
+  */
+  Field *field;
+  uint type;    /**< category of the of the copied field (CACHE_BLOB et al.) */
+  /* 
+    The number of the record offset value for the field in the sequence
+    of offsets placed after the last field of the record. These
+    offset values are used to access fields referred to from other caches.
+    If the value is 0 then no offset for the field is saved in the
+    trailing sequence of offsets.
+  */ 
+  uint referenced_field_no; 
+  /* The remaining structure fields are used as containers for temp values */
+  uint blob_length; /**< length of the blob to be copied */
+  uint offset;      /**< field offset to be saved in cache buffer */
+} CACHE_FIELD;
+
+
+/*
+  JOIN_CACHE is the base class to support the implementations of both
+  Blocked-Based Nested Loops (BNL) Join Algorithm and Batched Key Access (BKA)
+  Join Algorithm. The first algorithm is supported by the derived class
+  JOIN_CACHE_BNL, while the second algorithm is supported by the derived
+  class JOIN_CACHE_BKA.
+  These two algorithms have a lot in common. Both algorithms first
+  accumulate the records of the left join operand in a join buffer and
+  then search for matching rows of the second operand for all accumulated
+  records.
+  For the first algorithm this strategy saves on logical I/O operations:
+  the entire set of records from the join buffer requires only one look-through
+  the records provided by the second operand. 
+  For the second algorithm the accumulation of records allows to optimize
+  fetching rows of the second operand from disk for some engines (MyISAM, 
+  InnoDB), or to minimize the number of round-trips between the Server and
+  the engine nodes (NDB Cluster).        
+*/ 
+
+class JOIN_CACHE :public Sql_alloc
+{
+
+private:
+
+  /* Size of the offset of a record from the cache */   
+  uint size_of_rec_ofs;    
+  /* Size of the length of a record in the cache */
+  uint size_of_rec_len;
+  /* Size of the offset of a field within a record in the cache */   
+  uint size_of_fld_ofs;
+
+protected:
+       
+  /* 3 functions below actually do not use the hidden parameter 'this' */ 
+
+  /* Calculate the number of bytes used to store an offset value */
+  uint offset_size(uint len)
+  { return (len < 256 ? 1 : len < 256*256 ? 2 : 4); }
+
+  /* Get the offset value that takes ofs_sz bytes at the position ptr */
+  ulong get_offset(uint ofs_sz, uchar *ptr)
+  {
+    switch (ofs_sz) {
+    case 1: return uint(*ptr);
+    case 2: return uint2korr(ptr);
+    case 4: return uint4korr(ptr);
+    }
+    return 0;
+  }
+
+  /* Set the offset value ofs that takes ofs_sz bytes at the position ptr */ 
+  void store_offset(uint ofs_sz, uchar *ptr, ulong ofs)
+  {
+    switch (ofs_sz) {
+    case 1: *ptr= (uchar) ofs; return;
+    case 2: int2store(ptr, (uint16) ofs); return;
+    case 4: int4store(ptr, (uint32) ofs); return;
+    }
+  }
+  
+  /* 
+    The total maximal length of the fields stored for a record in the cache.
+    For blob fields only the sizes of the blob lengths are taken into account. 
+  */
+  uint length;
+
+  /* 
+    Representation of the executed multi-way join through which all needed
+    context can be accessed.  
+  */   
+  JOIN *join;  
+
+  /* 
+    Cardinality of the range of join tables whose fields can be put into the
+    cache. (A table from the range not necessarily contributes to the cache.)
+  */
+  uint tables;
+
+  /* 
+    The total number of flag and data fields that can appear in a record
+    written into the cache. Fields with null values are always skipped 
+    to save space. 
+  */
+  uint fields;
+
+  /* 
+    The total number of flag fields in a record put into the cache. They are
+    used for table null bitmaps, table null row flags, and an optional match
+    flag. Flag fields go before other fields in a cache record with the match
+    flag field placed always at the very beginning of the record.
+  */
+  uint flag_fields;
+
+  /* The total number of blob fields that are written into the cache */ 
+  uint blobs;
+
+  /* 
+    The total number of fields referenced from field descriptors for other join
+    caches. These fields are used to construct key values to access matching
+    rows with index lookups. Currently the fields can be referenced only from
+    descriptors for bka caches. However they may belong to a cache of any type.
+  */   
+  uint referenced_fields;
+   
+  /* 
+    The current number of already created data field descriptors.
+    This number can be useful for implementations of the init methods.  
+  */
+  uint data_field_count; 
+
+  /* 
+    The current number of already created pointers to the data field
+    descriptors. This number can be useful for implementations of
+    the init methods.  
+  */
+  uint data_field_ptr_count; 
+  /* 
+    Array of the descriptors of fields containing 'fields' elements.
+    These are all fields that are stored for a record in the cache. 
+  */
+  CACHE_FIELD *field_descr;
+
+  /* 
+    Array of pointers to the blob descriptors that contains 'blobs' elements.
+  */
+  CACHE_FIELD **blob_ptr;
+
+  /* 
+    This flag indicates that records written into the join buffer contain
+    a match flag field. The flag must be set by the init method. 
+  */
+  bool with_match_flag; 
+  /*
+    This flag indicates that any record is prepended with the length of the
+    record which allows us to skip the record or part of it without reading.
+  */
+  bool with_length;
+
+  /* 
+    The maximal number of bytes used for a record representation in
+    the cache excluding the space for blob data. 
+    For future derived classes this representation may contains some
+    redundant info such as a key value associated with the record.     
+  */
+  uint pack_length;
+  /* 
+    The value of pack_length incremented by the total size of all 
+    pointers of a record in the cache to the blob data. 
+  */
+  uint pack_length_with_blob_ptrs;
+
+  /* Pointer to the beginning of the join buffer */
+  uchar *buff;         
+  /* 
+    Size of the entire memory allocated for the join buffer.
+    Part of this memory may be reserved for the auxiliary buffer.
+  */ 
+  ulong buff_size;
+  /* Size of the auxiliary buffer. */ 
+  ulong aux_buff_size;
+
+  /* The number of records put into the join buffer */ 
+  uint records;
+
+  /* 
+    Pointer to the current position in the join buffer.
+    This member is used both when writing to buffer and
+    when reading from it.
+  */
+  uchar *pos;
+  /* 
+    Pointer to the first free position in the join buffer,
+    right after the last record into it.
+  */
+  uchar *end_pos; 
+
+  /* 
+    Pointer to the beginning of first field of the current read/write record
+    from the join buffer. The value is adjusted by the get_record/put_record
+    functions.
+  */
+  uchar *curr_rec_pos;
+  /* 
+    Pointer to the beginning of first field of the last record
+    from the join buffer.
+  */
+  uchar *last_rec_pos;
+
+  /* 
+    Flag is set if the blob data for the last record in the join buffer
+    is in record buffers rather than in the join cache.
+  */
+  bool last_rec_blob_data_is_in_rec_buff;
+
+  /* 
+    Pointer to the position to the current record link. 
+    Record links are used only with linked caches. Record links allow to set
+    connections between parts of one join record that are stored in different
+    join buffers.
+    In the simplest case a record link is just a pointer to the beginning of
+    the record stored in the buffer.
+    In a more general case a link could be a reference to an array of pointers
+    to records in the buffer.   */
+  uchar *curr_rec_link;
+
+  void calc_record_fields();     
+  int alloc_fields(uint external_fields);
+  void create_flag_fields();
+  void create_remaining_fields(bool all_read_fields);
+  void set_constants();
+  int alloc_buffer();
+
+  uint get_size_of_rec_offset() { return size_of_rec_ofs; }
+  uint get_size_of_rec_length() { return size_of_rec_len; }
+  uint get_size_of_fld_offset() { return size_of_fld_ofs; }
+
+  uchar *get_rec_ref(uchar *ptr)
+  {
+    return buff+get_offset(size_of_rec_ofs, ptr-size_of_rec_ofs);
+  }
+  ulong get_rec_length(uchar *ptr)
+  { 
+    return (ulong) get_offset(size_of_rec_len, ptr);
+  }
+  ulong get_fld_offset(uchar *ptr)
+  { 
+    return (ulong) get_offset(size_of_fld_ofs, ptr);
+  }
+
+  void store_rec_ref(uchar *ptr, uchar* ref)
+  {
+    store_offset(size_of_rec_ofs, ptr-size_of_rec_ofs, (ulong) (ref-buff));
+  }
+
+  void store_rec_length(uchar *ptr, ulong len)
+  {
+    store_offset(size_of_rec_len, ptr, len);
+  }
+  void store_fld_offset(uchar *ptr, ulong ofs)
+  {
+    store_offset(size_of_fld_ofs, ptr, ofs);
+  }
+
+  /* Write record fields and their required offsets into the join buffer */ 
+  uint write_record_data(uchar *link, bool *is_full);
+
+  /* 
+    This method must determine for how much the auxiliary buffer should be
+    incremented when a new record is added to the join buffer.
+    If no auxiliary buffer is needed the function should return 0.
+  */
+  virtual uint aux_buffer_incr() { return 0; }
+
+  /* Shall calculate how much space is remaining in the join buffer */ 
+  virtual ulong rem_space() 
+  { 
+    return max(buff_size-(end_pos-buff)-aux_buff_size,0);
+  }
+
+  /* Shall skip record from the join buffer if its match flag is on */
+  virtual bool skip_record_if_match();
+
+  /*  Read all flag and data fields of a record from the join buffer */
+  uint read_all_record_fields();
+  
+  /* Read all flag fields of a record from the join buffer */
+  uint read_flag_fields();
+
+  /* Read a data record field from the join buffer */
+  uint read_record_field(CACHE_FIELD *copy, bool last_record);
+
+  /* Read a referenced field from the join buffer */
+  bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len);
+
+  /* 
+    True if rec_ptr points to the record whose blob data stay in
+    record buffers
+  */
+  bool blob_data_is_in_rec_buff(uchar *rec_ptr)
+  {
+    return rec_ptr == last_rec_pos && last_rec_blob_data_is_in_rec_buff;
+  }
+
+  /* Find matches from the next table for records from the join buffer */   
+  virtual enum_nested_loop_state join_matching_records(bool skip_last)=0;
+
+  /* Add null complements for unmatched outer records from buffer */
+  virtual enum_nested_loop_state join_null_complements(bool skip_last);
+
+  /* Restore the fields of the last record from the join buffer */
+  virtual void restore_last_record();
+
+  /*Set match flag for a record in join buffer if it has not been set yet */
+  bool set_match_flag_if_none(JOIN_TAB *first_inner, uchar *rec_ptr);
+
+  enum_nested_loop_state generate_full_extensions(uchar *rec_ptr);
+
+  /* Check matching to a partial join record from the join buffer */
+  bool check_match(uchar *rec_ptr);
+
+public:
+
+  /* Table to be joined with the partial join records from the cache */ 
+  JOIN_TAB *join_tab;
+
+  /* Pointer to the previous join cache if there is any */
+  JOIN_CACHE *prev_cache;
+  /* Pointer to the next join cache if there is any */
+  JOIN_CACHE *next_cache;
+
+  /* Shall initialize the join cache structure */ 
+  virtual int init()=0;  
+
+  /* The function shall return TRUE only for BKA caches */
+  virtual bool is_key_access() { return FALSE; }
+
+  /* Shall reset the join buffer for reading/writing */
+  virtual void reset(bool for_writing);
+
+  /* 
+    This function shall add a record into the join buffer and return TRUE
+    if it has been decided that it should be the last record in the buffer.
+  */ 
+  virtual bool put_record();
+
+  /* 
+    This function shall read the next record into the join buffer and return
+    TRUE if there is no more next records.
+  */ 
+  virtual bool get_record();
+
+  /* 
+    This function shall read the record at the position rec_ptr
+    in the join buffer
+  */ 
+  virtual void get_record_by_pos(uchar *rec_ptr);
+
+  /* Shall return the value of the match flag for the positioned record */
+  virtual bool get_match_flag_by_pos(uchar *rec_ptr);
+
+  /* Shall return the position of the current record */
+  virtual uchar *get_curr_rec() { return curr_rec_pos; }
+
+  /* Shall set the current record link */
+  virtual void set_curr_rec_link(uchar *link) { curr_rec_link= link; }
+
+  /* Shall return the current record link */
+  virtual uchar *get_curr_rec_link()
+  { 
+    return (curr_rec_link ? curr_rec_link : get_curr_rec());
+  }
+     
+  /* Join records from the join buffer with records from the next join table */    
+  enum_nested_loop_state join_records(bool skip_last);
+
+  virtual ~JOIN_CACHE() {}
+  void reset_join(JOIN *j) { join= j; }
+  void free()
+  { 
+    x_free(buff);
+    buff= 0;
+  }   
+  
+  friend class JOIN_CACHE_BNL;
+  friend class JOIN_CACHE_BKA;
+  friend class JOIN_CACHE_BKA_UNIQUE;
+};
+
+
+class JOIN_CACHE_BNL :public JOIN_CACHE
+{
+
+protected:
+
+  /* Using BNL find matches from the next table for records from join buffer */
+  enum_nested_loop_state join_matching_records(bool skip_last);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BNL join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab)
+  { 
+    join= j;
+    join_tab= tab;
+    prev_cache= next_cache= 0;
+  }
+
+  /* 
+    This constructor creates a linked BNL join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)
+  { 
+    join= j;
+    join_tab= tab;
+    prev_cache= prev;
+    next_cache= 0;
+    if (prev)
+      prev->next_cache= this;
+  }
+
+  /* Initialize the BNL cache */       
+  int init();
+
+};
+
+class JOIN_CACHE_BKA :public JOIN_CACHE
+{
+protected:
+
+  /* Flag to to be passed to the MRR interface */ 
+  uint mrr_mode;
+
+  /* MRR buffer assotiated with this join cache */
+  HANDLER_BUFFER mrr_buff;
+
+  /* Shall initialize the MRR buffer */
+  virtual void init_mrr_buff()
+  {
+    mrr_buff.buffer= end_pos;
+    mrr_buff.buffer_end= buff+buff_size;
+  }
+
+  /*
+    The number of the cache fields that are used in building keys to access
+    the table join_tab
+  */
+  uint local_key_arg_fields;
+  /* 
+    The total number of the fields in the previous caches that are used
+    in building keys t access the table join_tab
+  */
+  uint external_key_arg_fields;
+
+  /* 
+    This flag indicates that the key values will be read directly from the join
+    buffer. It will save us building key values in the key buffer.
+  */
+  bool use_emb_key;
+  /* The length of an embedded key value */ 
+  uint emb_key_length;
+
+  /* Check the possibility to read the access keys directly from join buffer */  
+  bool check_emb_key_usage();
+
+  /* Calculate the increment of the MM buffer for a record write */
+  uint aux_buffer_incr();
+
+  /* Using BKA find matches from the next table for records from join buffer */
+  enum_nested_loop_state join_matching_records(bool skip_last);
+
+  /* Prepare to search for records that match records from the join buffer */
+  enum_nested_loop_state init_join_matching_records(RANGE_SEQ_IF *seq_funcs,
+                                                    uint ranges);
+
+  /* Finish searching for records that match records from the join buffer */
+  enum_nested_loop_state end_join_matching_records(enum_nested_loop_state rc);
+
+public:
+  
+  /* 
+    This constructor creates an unlinked BKA join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags)
+  { 
+    join= j;
+    join_tab= tab;
+    prev_cache= next_cache= 0;
+    mrr_mode= flags;
+  }
+
+  /* 
+    This constructor creates a linked BKA join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the cache
+    object to which this cache is linked.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags,  JOIN_CACHE* prev)
+  { 
+    join= j;
+    join_tab= tab;
+    prev_cache= prev;
+    next_cache= 0;
+    if (prev)
+      prev->next_cache= this;
+    mrr_mode= flags;
+  }
+
+  /* Initialize the BKA cache */       
+  int init();
+
+  bool is_key_access() { return TRUE; }
+
+  /* Shall get the key built over the next record from the join buffer */
+  virtual uint get_next_key(uchar **key);
+
+  /* Check if the record combination matches the index condition */
+  bool skip_index_tuple(range_seq_t rseq, char *range_info);
+};
+
+/*
+  The class JOIN_CACHE_BKA_UNIQUE supports the variant of the BKA join algorithm
+  that submits only distinct keys to the MRR interface. The records in the join
+  buffer of a cache of this class that have the same access key are linked into
+  a chain attached to a key entry structure that either itself contains the key
+  value, or, in the case when the keys are embedded, refers to its occurance in
+  one of the records from the chain.
+  To build the chains with the same keys a hash table is employed. It is placed
+  at the very end of the join buffer. The array of hash entries is allocated
+  first at the very bottom of the join buffer, then go key entries. A hash entry
+  contains a header of the list of the key entries with the same hash value. 
+  Each key entry is a structure of the following type:
+    struct st_join_cache_key_entry {
+      union { 
+        uchar[] value;
+        cache_ref *value_ref; // offset from the beginning of the buffer
+      } hash_table_key;
+      key_ref next_key; // offset backward from the beginning of hash table
+      cache_ref *last_rec // offset from the beginning of the buffer
+    }
+  The references linking the records in a chain are always placed at the very
+  beginning of the record info stored in the join buffer. The records are 
+  linked in a circular list. A new record is always added to the end of this 
+  list. When a key is passed to the MRR interface it can be passed either with
+  an association link containing a reference to the header of the record chain
+  attached to the corresponding key entry in the hash table, or without any
+  association link. When the next record is returned by a call to the MRR 
+  function multi_range_read_next without any association (because if was not
+  passed  together with the key) then the key value is extracted from the
+  returned record and searched for it in the hash table. If there is any records
+  with such key the chain of them will be yielded as the result of this search.
+
+  The following picture represents a typical layout for the info stored in the
+  join buffer of a join cache object of the JOIN_CACHE_BKA_UNIQUE class.
+    
+  buff
+  V
+  +----------------------------------------------------------------------------+
+  |     |[*]record_1_1|                                                        |
+  |     ^ |                                                                    |
+  |     | +--------------------------------------------------+                 |
+  |     |                           |[*]record_2_1|          |                 |
+  |     |                           ^ |                      V                 |
+  |     |                           | +------------------+   |[*]record_1_2|   |
+  |     |                           +--------------------+-+   |               |
+  |+--+ +---------------------+                          | |   +-------------+ |
+  ||  |                       |                          V |                 | |
+  |||[*]record_3_1|         |[*]record_1_3|              |[*]record_2_2|     | |
+  ||^                       ^                            ^                   | |
+  ||+----------+            |                            |                   | |
+  ||^          |            |<---------------------------+-------------------+ |
+  |++          | | ... mrr  |   buffer ...           ... |     |               |
+  |            |            |                            |                     |
+  |      +-----+--------+   |                      +-----|-------+             |
+  |      V     |        |   |                      V     |       |             |
+  ||key_3|[/]|[*]|      |   |                |key_2|[/]|[*]|     |             |
+  |                   +-+---|-----------------------+            |             |
+  |                   V |   |                       |            |             |
+  |             |key_1|[*]|[*]|         |   | ... |[*]|   ...  |[*]|  ...  |   |
+  +----------------------------------------------------------------------------+
+                                        ^           ^            ^
+                                        |           i-th entry   j-th entry
+                                        hash table
+
+  i-th hash entry:
+    circular record chain for key_1:
+      record_1_1
+      record_1_2
+      record_1_3 (points to record_1_1)
+    circular record chain for key_3:
+      record_3_1 (points to itself)
+
+  j-th hash entry:
+    circular record chain for key_2:
+      record_2_1
+      record_2_2 (points to record_2_1)
+
+*/
+
+class JOIN_CACHE_BKA_UNIQUE :public JOIN_CACHE_BKA
+{
+
+private:
+
+  /* Size of the offset of a key entry in the hash table */
+  uint size_of_key_ofs;
+
+  /* 
+    Length of a key value.
+    It is assumed that all key values have the same length.
+  */
+  uint key_length;
+  /* 
+    Length of the key entry in the hash table.
+    A key entry either contains the key value, or it contains a reference
+    to the key value if use_emb_key flag is set for the cache.
+  */ 
+  uint key_entry_length;
+ 
+  /* The beginning of the hash table in the join buffer */
+  uchar *hash_table;
+  /* Number of hash entries in the hash table */
+  uint hash_entries;
+
+  /* Number of key entries in the hash table (number of distinct keys) */
+  uint key_entries;
+
+  /* The position of the last key entry in the hash table */
+  uchar *last_key_entry;
+
+  /* The position of the currently retrieved key entry in the hash table */
+  uchar *curr_key_entry;
+
+  /* 
+    The offset of the record fields from the beginning of the record
+    representation. The record representation starts with a reference to
+    the next record in the key record chain followed by the length of
+    the trailing record data followed by a reference to the record segment
+     in the previous cache, if any, followed by the record fields.
+  */ 
+  uint rec_fields_offset;
+  /* The offset of the data fields from the beginning of the record fields */
+  uint data_fields_offset;
+  
+  uint get_hash_idx(uchar* key, uint key_len);
+
+  void cleanup_hash_table();
+  
+protected:
+
+  uint get_size_of_key_offset() { return size_of_key_ofs; }
+
+  /* 
+    Get the position of the next_key_ptr field pointed to by 
+    a linking reference stored at the position key_ref_ptr. 
+    This reference is actually the offset backward from the
+    beginning of hash table.
+  */  
+  uchar *get_next_key_ref(uchar *key_ref_ptr)
+  {
+    return hash_table-get_offset(size_of_key_ofs, key_ref_ptr);
+  }
+
+  /* 
+    Store the linking reference to the next_key_ptr field at 
+    the position key_ref_ptr. The position of the next_key_ptr
+    field is pointed to by ref. The stored reference is actually
+    the offset backward from the beginning of the hash table.
+  */  
+  void store_next_key_ref(uchar *key_ref_ptr, uchar *ref)
+  {
+    store_offset(size_of_key_ofs, key_ref_ptr, (ulong) (hash_table-ref));
+  }     
+  
+  /* 
+    Check whether the reference to the next_key_ptr field at the position
+    key_ref_ptr contains  a nil value.
+  */
+  bool is_null_key_ref(uchar *key_ref_ptr)
+  {
+    ulong nil= 0;
+    return memcmp(key_ref_ptr, &nil, size_of_key_ofs ) == 0;
+  } 
+
+  /* 
+    Set the reference to the next_key_ptr field at the position
+    key_ref_ptr equal to nil.
+  */
+  void store_null_key_ref(uchar *key_ref_ptr)
+  {
+    ulong nil= 0;
+    store_offset(size_of_key_ofs, key_ref_ptr, nil);
+  } 
+
+  uchar *get_next_rec_ref(uchar *ref_ptr)
+  {
+    return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+  }
+
+  void store_next_rec_ref(uchar *ref_ptr, uchar *ref)
+  {
+    store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+  }     
+ 
+  /*
+    Get the position of the embedded key value for the current
+    record pointed to by get_curr_rec().
+  */ 
+  uchar *get_curr_emb_key()
+  {
+    return get_curr_rec()+data_fields_offset;
+  }
+
+  /*
+    Get the position of the embedded key value pointed to by a reference
+    stored at ref_ptr. The stored reference is actually the offset from
+    the beginning of the join buffer.
+  */  
+  uchar *get_emb_key(uchar *ref_ptr)
+  {
+    return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+  }
+
+  /* 
+    Store the reference to an embedded key at the position key_ref_ptr.
+    The position of the embedded key is pointed to by ref. The stored
+    reference is actually the offset from the beginning of the join buffer.
+  */  
+  void store_emb_key_ref(uchar *ref_ptr, uchar *ref)
+  {
+    store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+  }
+  
+  /* 
+    Calculate how much space in the buffer would not be occupied by
+    records, key entries and additional memory for the MMR buffer.
+  */ 
+  ulong rem_space() 
+  { 
+    return max(last_key_entry-end_pos-aux_buff_size,0);
+  }
+
+  /* 
+    Initialize the MRR buffer allocating some space within the join buffer.
+    The entire space between the last record put into the join buffer and the
+    last key entry added to the hash table is used for the MRR buffer.
+  */
+  void init_mrr_buff()
+  {
+    mrr_buff.buffer= end_pos;
+    mrr_buff.buffer_end= last_key_entry;
+  }
+
+  /* Skip record from JOIN_CACHE_BKA_UNIQUE buffer if its match flag is on */
+  bool skip_record_if_match();
+
+  /* Using BKA_UNIQUE find matches for records from join buffer */
+  enum_nested_loop_state join_matching_records(bool skip_last);
+
+  /* Search for a key in the hash table of the join buffer */
+  bool key_search(uchar *key, uint key_len, uchar **key_ref_ptr);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BKA_UNIQUE join cache. The cache is
+    to be used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA_UNIQUE(JOIN *j, JOIN_TAB *tab, uint flags)
+    :JOIN_CACHE_BKA(j, tab, flags) {}
+
+  /* 
+    This constructor creates a linked BKA_UNIQUE join cache. The cache is
+    to be used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the cache
+    object to which this cache is linked.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA_UNIQUE(JOIN *j, JOIN_TAB *tab, uint flags,  JOIN_CACHE* prev)
+    :JOIN_CACHE_BKA(j, tab, flags, prev) {}
+
+  /* Initialize the BKA_UNIQUE cache */       
+  int init();
+
+  /* Reset the JOIN_CACHE_BKA_UNIQUE  buffer for reading/writing */
+  void reset(bool for_writing);
+
+  /* Add a record into the JOIN_CACHE_BKA_UNIQUE buffer */
+  bool put_record();
+
+  /* Read the next record from the JOIN_CACHE_BKA_UNIQUE buffer */
+  bool get_record();
+
+  /*
+    Shall check whether all records in a key chain have 
+    their match flags set on
+  */   
+  virtual bool check_all_match_flags_for_key(uchar *key_chain_ptr);
+
+  uint get_next_key(uchar **key); 
+  
+  /* Get the head of the record chain attached to the current key entry */ 
+  uchar *get_curr_key_chain()
+  {
+    return get_next_rec_ref(curr_key_entry+key_entry_length-
+                            get_size_of_rec_offset());
+  }
+  
+  /* Check if the record combination matches the index condition */
+  bool skip_index_tuple(range_seq_t rseq, char *range_info);
+};
+
+
 enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
                                         end_of_records);
 enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
                                   end_of_records);
+enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, 
+                                      bool end_of_records);
+
+enum_nested_loop_state
+end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
+	       bool end_of_records);
+enum_nested_loop_state
+end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
+		bool end_of_records);
+
 
 /**
   Information about a position of table within a join order. Used in join
@@ -276,6 +1269,89 @@ typedef struct st_position
 
   /* If ref-based access is used: bitmap of tables this table depends on  */
   table_map ref_depend_map;
+
+  bool use_join_buffer; 
+  
+  
+  /* These form a stack of partial join order costs and output sizes */
+  COST_VECT prefix_cost;
+  double    prefix_record_count;
+
+  /*
+    Current optimization state: Semi-join strategy to be used for this
+    and preceding join tables.
+    
+    Join optimizer sets this for the *last* join_tab in the
+    duplicate-generating range. That is, in order to interpret this field, 
+    one needs to traverse join->[best_]positions array from right to left.
+    When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
+    field (depending on the strategy) tells how many preceding positions 
+    this applies to. The values of covered_preceding_positions->sj_strategy
+    must be ignored.
+  */
+  uint sj_strategy;
+  /*
+    Valid only after fix_semijoin_strategies_for_picked_join_order() call:
+    if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
+    are covered by the specified semi-join strategy
+  */
+  uint n_sj_tables;
+
+/* LooseScan strategy members */
+
+  /* The first (i.e. driving) table we're doing loose scan for */
+  uint        first_loosescan_table;
+  /* 
+     Tables that need to be in the prefix before we can calculate the cost
+     of using LooseScan strategy.
+  */
+  table_map   loosescan_need_tables;
+
+  /*
+    keyno  -  Planning to do LooseScan on this key. If keyuse is NULL then 
+              this is a full index scan, otherwise this is a ref+loosescan
+              scan (and keyno matches the KEUSE's)
+    MAX_KEY - Not doing a LooseScan
+  */
+  uint loosescan_key;  // final (one for strategy instance )
+  uint loosescan_parts; /* Number of keyparts to be kept distinct */
+  
+/* FirstMatch strategy */
+  /*
+    Index of the first inner table that we intend to handle with this
+    strategy
+  */
+  uint first_firstmatch_table;
+  /*
+    Tables that were not in the join prefix when we've started considering 
+    FirstMatch strategy.
+  */
+  table_map first_firstmatch_rtbl;
+  /* 
+    Tables that need to be in the prefix before we can calculate the cost
+    of using FirstMatch strategy.
+   */
+  table_map firstmatch_need_tables;
+
+
+/* Duplicate Weedout strategy */
+  /* The first table that the strategy will need to handle */
+  uint  first_dupsweedout_table;
+  /*
+    Tables that we will need to have in the prefix to do the weedout step
+    (all inner and all outer that the involved semi-joins are correlated with)
+  */
+  table_map dupsweedout_tables;
+
+/* SJ-Materialization-Scan strategy */
+  /* The last inner table (valid once we're after it) */
+  uint      sjm_scan_last_inner;
+  /*
+    Tables that we need to have in the prefix to calculate the correct cost.
+    Basically, we need all inner tables and outer tables mentioned in the
+    semi-join's ON expression so we can correctly account for fanout.
+  */
+  table_map sjm_scan_need_tables;
 } POSITION;
 
 
@@ -289,6 +1365,19 @@ typedef struct st_rollup
 } ROLLUP;
 
 
+#define SJ_OPT_NONE 0
+#define SJ_OPT_DUPS_WEEDOUT 1
+#define SJ_OPT_LOOSE_SCAN   2
+#define SJ_OPT_FIRST_MATCH  3
+#define SJ_OPT_MATERIALIZE  4
+#define SJ_OPT_MATERIALIZE_SCAN  5
+
+inline bool sj_is_materialize_strategy(uint strategy)
+{
+  return strategy >= SJ_OPT_MATERIALIZE;
+}
+
+
 class JOIN :public Sql_alloc
 {
   JOIN(const JOIN &rhs);                        /**< not implemented */
@@ -297,9 +1386,18 @@ public:
   JOIN_TAB *join_tab,**best_ref;
   JOIN_TAB **map2table;    ///< mapping between table indexes and JOIN_TABs
   JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution
-  TABLE    **all_tables,*sort_by_table;
-  uint	   tables,const_tables;
+  TABLE    **all_tables;
+  /**
+    The table which has an index that allows to produce the requried ordering.
+    A special value of 0x1 means that the ordering will be produced by
+    passing 1st non-const table to filesort(). NULL means no such table exists.
+  */
+  TABLE    *sort_by_table;
+  uint	   tables;        /**< Number of tables in the join */
+  uint     outer_tables;  /**< Number of tables that are not inside semijoin */
+  uint     const_tables;
   uint	   send_group_parts;
+  bool	   group;          /**< If query contains GROUP BY clause */
   /**
     Indicates that grouping will be performed on the result set during
     query execution. This field belongs to query execution.
@@ -307,9 +1405,17 @@ public:
     @see make_group_fields, alloc_group_fields, JOIN::exec
   */
   bool     sort_and_group; 
-  bool     first_record,full_join,group, no_field_update;
+  bool     first_record,full_join, no_field_update;
   bool	   do_send_rows;
-  table_map const_table_map,found_const_table_map;
+  table_map const_table_map;
+  /*
+    Constant tables for which we have found a row (as opposed to those for
+    which we didn't).
+  */
+  table_map found_const_table_map;
+  
+  /* Tables removed by table elimination. Set to 0 before the elimination. */
+  table_map eliminated_tables;
   /*
      Bitmap of all inner tables from outer joins
   */
@@ -325,14 +1431,47 @@ public:
       - on each fetch iteration we add num_rows to fetch to fetch_limit
   */
   ha_rows  fetch_limit;
-  POSITION positions[MAX_TABLES+1],best_positions[MAX_TABLES+1];
+  /* Finally picked QEP. This is result of join optimization */
+  POSITION best_positions[MAX_TABLES+1];
+
+/******* Join optimization state members start *******/
+  /*
+    pointer - we're doing optimization for a semi-join materialization nest.
+    NULL    - otherwise
+  */
+  TABLE_LIST *emb_sjm_nest;
   
-  /* *
+  /* Current join optimization state */
+  POSITION positions[MAX_TABLES+1];
+  
+  /*
     Bitmap of nested joins embedding the position at the end of the current 
     partial join (valid only during join optimizer run).
   */
   nested_join_map cur_embedding_map;
+  
+  /*
+    Bitmap of inner tables of semi-join nests that have a proper subset of
+    their tables in the current join prefix. That is, of those semi-join
+    nests that have their tables both in and outside of the join prefix.
+  */
+  table_map cur_sj_inner_tables;
+  
+  /*
+    Bitmap of semi-join inner tables that are in the join prefix and for
+    which there's no provision for how to eliminate semi-join duplicates
+    they produce.
+  */
+  table_map cur_dups_producing_tables;
 
+  /* We also maintain a stack of join optimization states in * join->positions[] */
+/******* Join optimization state members end *******/
+  Next_select_func first_select;
+  /*
+    The cost of best complete join plan found so far during optimization,
+    after optimization phase - cost of picked join order (not taking into
+    account the changes made by test_if_skip_sort_order()).
+  */
   double   best_read;
   List<Item> *fields;
   List<Cached_item> group_fields, group_fields_cache;
@@ -363,24 +1502,31 @@ public:
     the number of rows in it may vary from one subquery execution to another.
   */
   bool no_const_tables; 
+  /*
+    This flag is set if we call no_rows_in_result() as par of end_group().
+    This is used as a simple speed optimization to avoiding calling
+    restore_no_rows_in_result() in ::reinit()
+  */
+  bool no_rows_in_result_called;
   
   /**
     Copy of this JOIN to be used with temporary tables.
 
-    tmp_join is used when the JOIN needs to be "reusable" (e.g. in a subquery
-    that gets re-executed several times) and we know will use temporary tables
-    for materialization. The materialization to a temporary table overwrites the
-    JOIN structure to point to the temporary table after the materialization is
-    done. This is where tmp_join is used : it's a copy of the JOIN before the
-    materialization and is used in restoring before re-execution by overwriting
-    the current JOIN structure with the saved copy.
-    Because of this we should pay extra care of not freeing up helper structures
-    that are referenced by the original contents of the JOIN. We can check for
-    this by making sure the "current" join is not the temporary copy, e.g.
-    !tmp_join || tmp_join != join
+    tmp_join is used when the JOIN needs to be "reusable" (e.g. in a
+    subquery that gets re-executed several times) and we know will use
+    temporary tables for materialization. The materialization to a
+    temporary table overwrites the JOIN structure to point to the
+    temporary table after the materialization is done. This is where
+    tmp_join is used : it's a copy of the JOIN before the
+    materialization and is used in restoring before re-execution by
+    overwriting the current JOIN structure with the saved copy.
+    Because of this we should pay extra care of not freeing up helper
+    structures that are referenced by the original contents of the
+    JOIN. We can check for this by making sure the "current" join is
+    not the temporary copy, e.g.  !tmp_join || tmp_join != join
  
-    We should free these sub-structures at JOIN::destroy() if the "current" join
-    has a copy is not that copy.
+    We should free these sub-structures at JOIN::destroy() if the
+    "current" join has a copy is not that copy.
   */
   JOIN *tmp_join;
   ROLLUP rollup;				///< Used with rollup
@@ -429,6 +1575,7 @@ public:
   TABLE_LIST *tables_list;           ///<hold 'tables' parameter of mysql_select
   List<TABLE_LIST> *join_list;       ///< list of joined tables in reverse order
   COND_EQUAL *cond_equal;
+  COND_EQUAL *having_equal;
   SQL_SELECT *select;                ///<created in optimisation phase
   JOIN_TAB *return_tab;              ///<used only for outer joins
   Item **ref_pointer_array; ///<used pointer reference for this select
@@ -440,6 +1587,13 @@ public:
   bool union_part; ///< this subselect is part of union 
   bool optimized; ///< flag to avoid double optimization in EXPLAIN
 
+  Array<Item_in_subselect> sj_subselects;
+
+  /* Temporary tables used to weed-out semi-join duplicates */
+  List<TABLE> sj_tmp_tables;
+  /* SJM nests that are executed with SJ-Materialization strategy */
+  List<SJ_MATERIALIZATION_INFO> sjm_info_list;
+
   /* 
     storage for caching buffers allocated during query execution. 
     These buffers allocations need to be cached as the thread memory pool is
@@ -457,7 +1611,7 @@ public:
 
   JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg,
        select_result *result_arg)
-    :fields_list(fields_arg)
+    :fields_list(fields_arg), sj_subselects(thd_arg->mem_root, 4)
   {
     init(thd_arg, fields_arg, select_options_arg, result_arg);
   }
@@ -469,6 +1623,7 @@ public:
     all_tables= 0;
     tables= 0;
     const_tables= 0;
+    eliminated_tables= 0;
     join_list= 0;
     implicit_grouping= FALSE;
     sort_and_group= 0;
@@ -507,7 +1662,9 @@ public:
     zero_result_cause= 0;
     optimized= 0;
     cond_equal= 0;
+    having_equal= 0;
     group_optimized_away= 0;
+    no_rows_in_result_called= 0;
 
     all_fields= fields_arg;
     if (&fields_list != &fields_arg)      /* Avoid valgrind-warning */
@@ -518,6 +1675,7 @@ public:
     rollup.state= ROLLUP::STATE_NONE;
 
     no_const_tables= FALSE;
+    first_select= sub_select;
   }
 
   int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
@@ -530,6 +1688,8 @@ public:
   int destroy();
   void restore_tmp();
   bool alloc_func_list();
+  bool flatten_subqueries();
+  bool setup_subquery_materialization();
   bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
 			  bool before_group_by, bool recompute= FALSE);
 
@@ -551,7 +1711,6 @@ public:
 			  Item_sum ***func);
   int rollup_send_data(uint idx);
   int rollup_write_data(uint idx, TABLE *table);
-  void remove_subq_pushed_predicates(Item **where);
   /**
     Release memory and, if possible, the open tables held by this execution
     plan (and nested plans). It's used to release some tables before
@@ -576,6 +1735,21 @@ public:
                                         select_lex == unit->fake_select_lex));
   }
   void cache_const_exprs();
+  inline table_map all_tables_map()
+  {
+    return (table_map(1) << tables) - 1;
+  }
+  /* 
+    Return the table for which an index scan can be used to satisfy 
+    the sort order needed by the ORDER BY/(implicit) GROUP BY clause 
+  */
+  JOIN_TAB *get_sort_by_join_tab()
+  {
+    return (need_tmp || !sort_by_table || skip_sort_order ||
+            ((group || tmp_table_param.sum_func_count) && !group_list)) ?
+              NULL : join_tab+const_tables;
+  }
+  bool setup_subquery_caches();
 private:
   /**
     TRUE if the query contains an aggregate function but has no GROUP
@@ -595,11 +1769,6 @@ extern const char *join_type_str[];
 
 /* Extern functions in sql_select.cc */
 bool store_val_in_field(Field *field, Item *val, enum_check_fields check_flag);
-TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
-			ORDER *group, bool distinct, bool save_sum_fields,
-			ulonglong select_options, ha_rows rows_limit,
-			const char* alias);
-void free_tmp_table(THD *thd, TABLE *entry);
 void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, 
                        List<Item> &fields, bool reset_with_sum_func);
 bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
@@ -608,8 +1777,6 @@ bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
 		       uint elements, List<Item> &fields);
 void copy_fields(TMP_TABLE_PARAM *param);
 bool copy_funcs(Item **func_ptr, const THD *thd);
-bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
-			     int error, bool ignore_last_dupp_error);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 Field* create_tmp_field_from_field(THD *thd, Field* org_field,
                                    const char *name, TABLE *table,
@@ -723,12 +1890,17 @@ class store_key_item :public store_key
 {
  protected:
   Item *item;
+  /*
+    Flag that forces usage of save_val() method which save value of the
+    item instead of save_in_field() method which saves result.
+  */
+  bool use_value;
 public:
   store_key_item(THD *thd, Field *to_field_arg, uchar *ptr,
-                 uchar *null_ptr_arg, uint length, Item *item_arg)
+                 uchar *null_ptr_arg, uint length, Item *item_arg, bool val)
     :store_key(thd, to_field_arg, ptr,
 	       null_ptr_arg ? null_ptr_arg : item_arg->maybe_null ?
-	       &err : (uchar*) 0, length), item(item_arg)
+	       &err : (uchar*) 0, length), item(item_arg), use_value(val)
   {}
   const char *name() const { return "func"; }
 
@@ -738,7 +1910,11 @@ public:
     TABLE *table= to_field->table;
     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
                                                      table->write_set);
-    int res= item->save_in_field(to_field, 1);
+    int res= FALSE;
+    if (use_value)
+      item->save_val(to_field);
+    else
+      res= item->save_in_field(to_field, 1);
     /*
      Item::save_in_field() may call Item::val_xxx(). And if this is a subquery
      we need to check for errors executing it and react accordingly
@@ -762,7 +1938,7 @@ public:
 		       Item *item_arg)
     :store_key_item(thd, to_field_arg,ptr,
 		    null_ptr_arg ? null_ptr_arg : item_arg->maybe_null ?
-		    &err : (uchar*) 0, length, item_arg), inited(0)
+		    &err : (uchar*) 0, length, item_arg, FALSE), inited(0)
   {
   }
   const char *name() const { return "const"; }
@@ -805,7 +1981,9 @@ int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
 		List<Item> &fields, List<Item> &all_fields, ORDER *order,
 		bool *hidden_group_fields);
 bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
-                   Item **ref_pointer_array, ORDER *group_list= NULL);
+                   Item **ref_pointer_array);
+int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table,
+                   struct st_table_ref *table_ref);
 
 bool handle_select(THD *thd, LEX *lex, select_result *result,
                    ulong setup_tables_done_option);
@@ -836,6 +2014,26 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
 
 TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list);
 
+int test_if_item_cache_changed(List<Cached_item> &list);
+void calc_used_field_length(THD *thd, JOIN_TAB *join_tab);
+int join_init_read_record(JOIN_TAB *tab);
+void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
+inline Item * and_items(Item* cond, Item *item)
+{
+  return (cond? (new Item_cond_and(cond, item)) : item);
+}
+bool choose_plan(JOIN *join,table_map join_tables);
+void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
+                           double *record_count_arg);
+void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, 
+                                table_map last_remaining_tables, 
+                                bool first_alt, uint no_jbuf_before,
+                                double *reopt_rec_count, double *reopt_cost,
+                                double *sj_inner_fanout);
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
+bool test_if_ref(COND *root_cond, 
+                 Item_field *left_item,Item *right_item);
 
 inline bool optimizer_flag(THD *thd, uint flag)
 { 
@@ -849,5 +2047,35 @@ bool const_expression_in_where(COND *cond, Item *comp_item,
                                Field *comp_field= NULL,
                                Item **const_item= NULL);
 
+/* Table elimination entry point function */
+void eliminate_tables(JOIN *join);
+
+/* Index Condition Pushdown entry point function */
+void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok);
+
+/****************************************************************************
+  Temporary table support for SQL Runtime
+ ***************************************************************************/
+
+#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
+#define AVG_STRING_LENGTH_TO_PACK_ROWS   64
+#define RATIO_TO_PACK_ROWS	       2
+#define MIN_STRING_LENGTH_TO_PACK_ROWS   10
+
+TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
+			ORDER *group, bool distinct, bool save_sum_fields,
+			ulonglong select_options, ha_rows rows_limit,
+			char* alias);
+void free_tmp_table(THD *thd, TABLE *entry);
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error, bool ignore_last_dupp_key_error);
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options);
+bool open_tmp_table(TABLE *table);
+void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
 
 #endif /* SQL_SELECT_INCLUDED */