227 files changed, 62913 insertions, 17327 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index fb86d1ce60f..5e6e6b48a27 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -36,7 +36,6 @@ IF(SSL_DEFINES)
  ADD_DEFINITIONS(${SSL_DEFINES})
 ENDIF()
 
-
 SET (SQL_SOURCE
               ../sql-common/client.c derror.cc des_key_file.cc
                discover.cc ../libmysql/errmsg.c field.cc  field_conv.cc 
@@ -62,7 +61,7 @@ SET (SQL_SOURCE
                sql_list.cc sql_load.cc sql_manager.cc sql_parse.cc
                sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc 
                debug_sync.cc debug_sync.h
-               sql_repl.cc sql_select.cc sql_show.cc sql_state.c sql_string.cc 
+               sql_repl.cc sql_select.cc sql_show.cc sql_state.c sql_string.cc
                sql_table.cc sql_test.cc sql_trigger.cc sql_udf.cc sql_union.cc
                sql_update.cc sql_view.cc strfunc.cc table.cc thr_malloc.cc 
                sql_time.cc tztime.cc uniques.cc unireg.cc item_xmlfunc.cc 
@@ -76,6 +75,13 @@ SET (SQL_SOURCE
                sql_signal.cc rpl_handler.cc mdl.cc sql_admin.cc
                transaction.cc sys_vars.cc sql_truncate.cc datadict.cc
                sql_reload.cc
+
+               # added in MariaDB:
+               sql_lifo_buffer.h sql_join_cache.h sql_join_cache.cc
+               create_options.cc multi_range_read.cc
+               opt_index_cond_pushdown.cc opt_subselect.cc
+               opt_table_elimination.cc sql_expression_cache.cc
+
                ${GEN_SOURCES}
                ${MYSYS_LIBWRAP_SOURCE})
 
@@ -86,7 +92,7 @@ ADD_LIBRARY(sql STATIC ${SQL_SOURCE})
 ADD_DEPENDENCIES(sql GenServerSource)
 DTRACE_INSTRUMENT(sql)
 TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} 
-  mysys dbug strings vio regex   
+  mysys ${DBUG_LIBRARY} strings vio regex
   ${LIBWRAP} ${LIBCRYPT} ${LIBDL}
   ${SSL_LIBRARIES})
 
@@ -94,6 +100,7 @@ TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS}
 
 IF(WIN32)
   SET(MYSQLD_SOURCE main.cc nt_servc.cc nt_servc.h message.rc)
+  TARGET_LINK_LIBRARIES(sql psapi)
 ELSE()
   SET(MYSQLD_SOURCE main.cc ${DTRACE_PROBES_ALL})
 ENDIF()
@@ -136,11 +143,11 @@ IF(NOT WITHOUT_DYNAMIC_PLUGINS)
                   ${_PLATFORM}  ${LIB_LOCATIONS} > mysqld.def 
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
     ADD_DEPENDENCIES(sql GenError)
-  ENDIF()
-ENDIF()
+  ENDIF(MSVC)
+ENDIF(NOT WITHOUT_DYNAMIC_PLUGINS)
 
 SET_TARGET_PROPERTIES(mysqld PROPERTIES ENABLE_EXPORTS TRUE) 
-TARGET_LINK_LIBRARIES(mysqld sql mysys)
+TARGET_LINK_LIBRARIES(mysqld sql)
 
 # Provide plugins with minimal set of libraries
 SET(INTERFACE_LIBS ${LIBRT})
diff --git a/sql/authors.h b/sql/authors.h
index a44557a4363..018c8fabc31 100644
--- a/sql/authors.h
+++ b/sql/authors.h
@@ -37,24 +37,36 @@ struct show_table_authors_st {
 */
 
 struct show_table_authors_st show_table_authors[]= {
+  { "Michael (Monty) Widenius", "Tusby, Finland",
+    "Lead developer and main author" },
+  { "David Axmark", "London, England",
+    "MySQL founder; Small stuff long time ago, Monty ripped it out!" },
+  { "Sergei Golubchik", "Kerpen, Germany",
+    "Full-text search, precision math" },
+  { "Igor Babaev", "Bellevue, USA", "Optimizer, keycache, core work"},
+  { "Sergey Petrunia", "St. Petersburg, Russia", "Optimizer"},
+  { "Oleksandr Byelkin", "Lugansk, Ukraine",
+    "Query Cache (4.0), Subqueries (4.1), Views (5.0)" },
   { "Brian (Krow) Aker", "Seattle, WA, USA",
     "Architecture, archive, federated, bunch of little stuff :)" },
   { "Marc Alff", "Denver, CO, USA", "Signal, Resignal, Performance schema" },
   { "Venu Anuganti", "", "Client/server protocol (4.1)" },
-  { "David Axmark", "Uppsala, Sweden",
-    "Small stuff long time ago, Monty ripped it out!" },
+  { "Kristian Nielsen", "Copenhagen, Denmark",
+    "General build stuff," },
   { "Alexander (Bar) Barkov", "Izhevsk, Russia",
     "Unicode and character sets (4.1)" },
+  { "Guilhem Bichot", "Bordeaux, France", "Replication (since 4.0)" },
+  { "Konstantin Osipov", "Moscow, Russia",
+    "Prepared statements (4.1), Cursors (5.0)" },
+  { "Dmitri Lenev", "Moscow, Russia",
+    "Time zones support (4.1), Triggers (5.0)" },
   { "Omer BarNir", "Sunnyvale, CA, USA",
     "Testing (sometimes) and general QA stuff" },
-  { "Guilhem Bichot", "Bordeaux, France", "Replication (since 4.0)" },
   { "John Birrell", "", "Emulation of pthread_mutex() for OS/2" },
   { "Andreas F. Bobak", "", "AGGREGATE extension to user-defined functions" },
   { "Alexey Botchkov (Holyfoot)", "Izhevsk, Russia",
     "GIS extensions (4.1), embedded server (4.1), precision math (5.0)"},
   { "Reggie Burnett", "Nashville, TN, USA", "Windows development, Connectors" },
-  { "Oleksandr Byelkin", "Lugansk, Ukraine",
-    "Query Cache (4.0), Subqueries (4.1), Views (5.0)" },
   { "Kent Boortz", "Orebro, Sweden", "Test platform, and general build stuff" },
   { "Tim Bunce", "", "mysqlhotcopy" },
   { "Yves Carlier", "", "mysqlaccess" },
@@ -71,8 +83,6 @@ struct show_table_authors_st show_table_authors[]= {
   { "Yuri Dario", "", "OS/2 port" },
   { "Andrei Elkin", "Espoo, Finland", "Replication" },
   { "Patrick Galbraith", "Sharon, NH", "Federated Engine, mysqlslap" },
-  { "Sergei Golubchik", "Kerpen, Germany",
-    "Full-text search, precision math" },
   { "Lenz Grimmer", "Hamburg, Germany",
     "Production (build and release) engineering" },
   { "Nikolay Grishakin", "Austin, TX, USA", "Testing - Server" },
@@ -88,8 +98,6 @@ struct show_table_authors_st show_table_authors[]= {
   { "Hakan Küçükyılmaz", "Walldorf, Germany", "Testing - Server" },
   { "Greg (Groggy) Lehey", "Uchunga, SA, Australia", "Backup" },
   { "Matthias Leich", "Berlin, Germany", "Testing - Server" },
-  { "Dmitri Lenev", "Moscow, Russia",
-    "Time zones support (4.1), Triggers (5.0)" },
   { "Arjen Lentz", "Brisbane, Australia",
     "Documentation (2001-2004), Dutch error messages, LOG2()" },
   { "Marc Liyanage", "", "Created Mac OS X packages" },
@@ -102,8 +110,6 @@ struct show_table_authors_st show_table_authors[]= {
   { "Jonathan (Jeb) Miller", "Kyle, TX, USA",
     "Testing - Cluster, Replication" },
   { "Elliot Murphy", "Cocoa, FL, USA", "Replication and backup" },
-  { "Kristian Nielsen", "Copenhagen, Denmark",
-    "General build stuff" },
   { "Pekka Nouisiainen", "Stockholm, Sweden",
     "NDB Cluster: BLOB support, character set support, ordered indexes" },
   { "Alexander Nozdrin", "Moscow, Russia",
@@ -111,8 +117,6 @@ struct show_table_authors_st show_table_authors[]= {
   { "Per Eric Olsson", "", "Testing of dynamic record format" },
   { "Jonas Oreland", "Stockholm, Sweden",
     "NDB Cluster, Online Backup, lots of other things" },
-  { "Konstantin Osipov", "Moscow, Russia",
-    "Prepared statements (4.1), Cursors (5.0)" },
   { "Alexander (Sasha) Pachev", "Provo, UT, USA",
     "Statement-based replication, SHOW CREATE TABLE, mysql-bench" },
   { "Irena Pancirov", "", "Port to Windows with Borland compiler" },
@@ -150,10 +154,10 @@ struct show_table_authors_st show_table_authors[]= {
   { "Sergey Vojtovich", "Izhevsk, Russia", "Plugins infrastructure (5.1)" },
   { "Matt Wagner", "Northfield, MN, USA", "Bug fixing" },
   { "Jim Winstead Jr.", "Los Angeles, CA, USA", "Bug fixing" },
-  { "Michael (Monty) Widenius", "Tusby, Finland",
-    "Lead developer and main author" },
   { "Peter Zaitsev", "Tacoma, WA, USA",
     "SHA1(), AES_ENCRYPT(), AES_DECRYPT(), bug fixing" },
+  {"Mark Mark Callaghan", "Texas, USA", "Statistics patches"},
+  {"Percona", "CA, USA", "Microslow patches"},
   {NULL, NULL, NULL}
 };
 
diff --git a/sql/client_settings.h b/sql/client_settings.h
index acae7907aa5..54cb72f9412 100644
--- a/sql/client_settings.h
+++ b/sql/client_settings.h
@@ -37,7 +37,6 @@
                              CLIENT_PLUGIN_AUTH)
 
 #define read_user_name(A) {}
-#undef HAVE_SMEM
 #undef _CUSTOMCONFIG_
 
 #define mysql_server_init(a,b,c) mysql_client_plugin_init()
diff --git a/sql/create_options.cc b/sql/create_options.cc
new file mode 100644
index 00000000000..8e46bb583a5
--- /dev/null
+++ b/sql/create_options.cc
@@ -0,0 +1,615 @@
+/* Copyright (C) 2010 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/**
+  @file
+
+  Engine defined options of tables/fields/keys in CREATE/ALTER TABLE.
+*/
+
+#include "create_options.h"
+#include <my_getopt.h>
+
+#define FRM_QUOTED_VALUE 0x8000
+
+/**
+  Links this item to the given list end
+
+  @param start           The list beginning or NULL
+  @param end             The list last element or does not matter
+*/
+
+void engine_option_value::link(engine_option_value **start,
+                               engine_option_value **end)
+{
+  DBUG_ENTER("engine_option_value::link");
+  DBUG_PRINT("enter", ("name: '%s' (%u)  value: '%s' (%u)",
+                       name.str, (uint) name.length,
+                       value.str, (uint) value.length));
+  engine_option_value *opt;
+  /* check duplicates to avoid writing them to frm*/
+  for(opt= *start;
+      opt && ((opt->parsed && !opt->value.str) ||
+              my_strnncoll(system_charset_info,
+                           (uchar *)name.str, name.length,
+                           (uchar*)opt->name.str, opt->name.length));
+      opt= opt->next) /* no-op */;
+  if (opt)
+  {
+    opt->value.str= NULL;       /* remove previous value */
+    opt->parsed= TRUE;          /* and don't issue warnings for it anymore */
+  }
+  /*
+    Add this option to the end of the list
+
+    @note: We add even if it is opt->value.str == NULL because it can be
+    ALTER TABLE to remove the option.
+  */
+  if (*start)
+  {
+    (*end)->next= this;
+    *end= this;
+  }
+  else
+  {
+    /*
+      note that is *start == 0, the value of *end does not matter,
+      it can be uninitialized.
+    */
+    *start= *end= this;
+  }
+  DBUG_VOID_RETURN;
+}
+
+static bool report_wrong_value(THD *thd, const char *name, const char *val,
+                               my_bool suppress_warning)
+{
+  if (suppress_warning)
+    return 0;
+
+  if (!(thd->variables.sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) &&
+      !thd->slave_thread)
+  {
+    my_error(ER_BAD_OPTION_VALUE, MYF(0), val, name);
+    return 1;
+  }
+
+  push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_BAD_OPTION_VALUE,
+                      ER(ER_BAD_OPTION_VALUE), val, name);
+  return 0;
+}
+
+static bool report_unknown_option(THD *thd, engine_option_value *val,
+                                  my_bool suppress_warning)
+{
+  DBUG_ENTER("report_unknown_option");
+
+  if (val->parsed || suppress_warning)
+  {
+    DBUG_PRINT("info", ("parsed => exiting"));
+    DBUG_RETURN(FALSE);
+  }
+
+  if (!(thd->variables.sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) &&
+      !thd->slave_thread)
+  {
+    my_error(ER_UNKNOWN_OPTION, MYF(0), val->name.str);
+    DBUG_RETURN(TRUE);
+  }
+
+  push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                      ER_UNKNOWN_OPTION, ER(ER_UNKNOWN_OPTION), val->name.str);
+  DBUG_RETURN(FALSE);
+}
+
+static bool set_one_value(ha_create_table_option *opt,
+                          THD *thd, LEX_STRING *value, void *base,
+                          my_bool suppress_warning,
+                          MEM_ROOT *root)
+{
+  DBUG_ENTER("set_one_value");
+  DBUG_PRINT("enter", ("opt: 0x%lx type: %u name '%s' value: '%s'",
+                       (ulong) opt,
+                       opt->type, opt->name,
+                       (value->str ? value->str : "<DEFAULT>")));
+  switch (opt->type)
+  {
+  case HA_OPTION_TYPE_ULL:
+    {
+      ulonglong *val= (ulonglong*)((char*)base + opt->offset);
+      if (!value->str)
+      {
+        *val= opt->def_value;
+        DBUG_RETURN(0);
+      }
+
+      my_option optp=
+        { opt->name, 1, 0, (uchar **)val, 0, 0, GET_ULL,
+          REQUIRED_ARG, opt->def_value, opt->min_value, opt->max_value,
+          0, (long) opt->block_size, 0};
+
+      ulonglong orig_val= strtoull(value->str, NULL, 10);
+      my_bool unused;
+      *val= orig_val;
+      *val= getopt_ull_limit_value(*val, &optp, &unused);
+      if (*val == orig_val)
+        DBUG_RETURN(0);
+
+      DBUG_RETURN(report_wrong_value(thd, opt->name, value->str,
+                                     suppress_warning));
+    }
+  case HA_OPTION_TYPE_STRING:
+    {
+      char **val= (char **)((char *)base + opt->offset);
+      if (!value->str)
+      {
+        *val= 0;
+        DBUG_RETURN(0);
+      }
+
+      if (!(*val= strmake_root(root, value->str, value->length)))
+        DBUG_RETURN(1);
+      DBUG_RETURN(0);
+    }
+  case HA_OPTION_TYPE_ENUM:
+    {
+      uint *val= (uint *)((char *)base + opt->offset), num;
+
+      *val= (uint) opt->def_value;
+      if (!value->str)
+        DBUG_RETURN(0);
+
+      const char *start= opt->values, *end;
+
+      num= 0;
+      while (*start)
+      {
+        for (end=start;
+             *end && *end != ',';
+             end+= my_mbcharlen(system_charset_info, *end)) /* no-op */;
+        if (!my_strnncoll(system_charset_info,
+                          (uchar*)start, end-start,
+                          (uchar*)value->str, value->length))
+        {
+          *val= num;
+          DBUG_RETURN(0);
+        }
+        if (*end)
+          end++;
+        start= end;
+        num++;
+      }
+
+      DBUG_RETURN(report_wrong_value(thd, opt->name, value->str,
+                                     suppress_warning));
+    }
+  case HA_OPTION_TYPE_BOOL:
+    {
+      bool *val= (bool *)((char *)base + opt->offset);
+      *val= opt->def_value;
+
+      if (!value->str)
+        DBUG_RETURN(0);
+
+      if (!my_strnncoll(system_charset_info,
+                        (const uchar*)"NO", 2,
+                        (uchar *)value->str, value->length) ||
+          !my_strnncoll(system_charset_info,
+                        (const uchar*)"OFF", 3,
+                        (uchar *)value->str, value->length) ||
+          !my_strnncoll(system_charset_info,
+                        (const uchar*)"0", 1,
+                        (uchar *)value->str, value->length))
+      {
+        *val= FALSE;
+        DBUG_RETURN(FALSE);
+      }
+
+      if (!my_strnncoll(system_charset_info,
+                        (const uchar*)"YES", 3,
+                        (uchar *)value->str, value->length) ||
+          !my_strnncoll(system_charset_info,
+                        (const uchar*)"ON", 2,
+                        (uchar *)value->str, value->length) ||
+          !my_strnncoll(system_charset_info,
+                        (const uchar*)"1", 1,
+                        (uchar *)value->str, value->length))
+      {
+        *val= TRUE;
+        DBUG_RETURN(FALSE);
+      }
+
+      DBUG_RETURN(report_wrong_value(thd, opt->name, value->str,
+                                     suppress_warning));
+    }
+  }
+  DBUG_ASSERT(0);
+  my_error(ER_UNKNOWN_ERROR, MYF(0));
+  DBUG_RETURN(1);
+}
+
+static const size_t ha_option_type_sizeof[]=
+{ sizeof(ulonglong), sizeof(char *), sizeof(uint), sizeof(bool)};
+
+/**
+  Creates option structure and parses list of options in it
+
+  @param thd              thread handler
+  @param option_struct    where to store pointer on the option struct
+  @param option_list      list of options given by user
+  @param rules            list of option description by engine
+  @param suppress_warning second parse so we do not need warnings
+  @param root             MEM_ROOT where allocate memory
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+my_bool parse_option_list(THD* thd, void *option_struct_arg,
+                          engine_option_value *option_list,
+                          ha_create_table_option *rules,
+                          my_bool suppress_warning,
+                          MEM_ROOT *root)
+{
+  ha_create_table_option *opt;
+  size_t option_struct_size= 0;
+  engine_option_value *val= option_list;
+  void **option_struct= (void**)option_struct_arg;
+  DBUG_ENTER("parse_option_list");
+  DBUG_PRINT("enter",
+             ("struct: 0x%lx list: 0x%lx rules: 0x%lx suppres %u root 0x%lx",
+              (ulong) *option_struct, (ulong)option_list, (ulong)rules,
+              (uint) suppress_warning, (ulong) root));
+
+  if (rules)
+  {
+    LEX_STRING default_val= {NULL, 0};
+    for (opt= rules; opt->name; opt++)
+      set_if_bigger(option_struct_size, opt->offset +
+                    ha_option_type_sizeof[opt->type]);
+
+    *option_struct= alloc_root(root, option_struct_size);
+
+    /* set all values to default */
+    for (opt= rules; opt->name; opt++)
+      set_one_value(opt, thd, &default_val, *option_struct,
+                    suppress_warning, root);
+  }
+
+  for (; val; val= val->next)
+  {
+    for (opt= rules; opt && opt->name; opt++)
+    {
+      if (my_strnncoll(system_charset_info,
+                       (uchar*)opt->name, opt->name_length,
+                       (uchar*)val->name.str, val->name.length))
+        continue;
+
+      if (set_one_value(opt, thd, &val->value,
+                        *option_struct, suppress_warning || val->parsed, root))
+        DBUG_RETURN(TRUE);
+      val->parsed= true;
+      break;
+    }
+    if (report_unknown_option(thd, val, suppress_warning))
+      DBUG_RETURN(TRUE);
+    val->parsed= true;
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Parses all table/fields/keys options
+
+  @param thd             thread handler
+  @param file            handler of the table
+  @parem share           descriptor of the table
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+my_bool parse_engine_table_options(THD *thd, handlerton *ht,
+                                   TABLE_SHARE *share)
+{
+  MEM_ROOT *root= &share->mem_root;
+  DBUG_ENTER("parse_engine_table_options");
+
+  if (parse_option_list(thd, &share->option_struct, share->option_list,
+                        ht->table_options, TRUE, root))
+    DBUG_RETURN(TRUE);
+
+  for (Field **field= share->field; *field; field++)
+  {
+    if (parse_option_list(thd, &(*field)->option_struct, (*field)->option_list,
+                          ht->field_options, TRUE, root))
+      DBUG_RETURN(TRUE);
+  }
+
+  for (uint index= 0; index < share->keys; index ++)
+  {
+    if (parse_option_list(thd, &share->key_info[index].option_struct,
+                          share->key_info[index].option_list,
+                          ht->index_options, TRUE, root))
+      DBUG_RETURN(TRUE);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Returns representation length of key and value in the frm file
+*/
+
+uint engine_option_value::frm_length()
+{
+  /*
+    1 byte  - name length
+    2 bytes - value length
+
+    if value.str is NULL, this option is not written to frm (=DEFAULT)
+  */
+  return value.str ? 1 + name.length + 2 + value.length : 0;
+}
+
+
+/**
+  Returns length of representation of option list in the frm file
+*/
+
+static uint option_list_frm_length(engine_option_value *opt)
+{
+  uint res= 0;
+
+  for (; opt; opt= opt->next)
+    res+= opt->frm_length();
+
+  return res;
+}
+
+
+/**
+  Calculates length of options image in the .frm
+
+  @param table_option_list list of table options
+  @param create_fields     field descriptors list
+  @param keys              number of keys
+  @param key_info          array of key descriptors
+
+  @returns length of image in frm
+*/
+
+uint engine_table_options_frm_length(engine_option_value *table_option_list,
+                                     List<Create_field> &create_fields,
+                                     uint keys, KEY *key_info)
+{
+  List_iterator<Create_field> it(create_fields);
+  Create_field *field;
+  uint res, index;
+  DBUG_ENTER("engine_table_options_frm_length");
+
+  res= option_list_frm_length(table_option_list);
+
+  while ((field= it++))
+    res+= option_list_frm_length(field->option_list);
+
+  for (index= 0; index < keys; index++, key_info++)
+    res+= option_list_frm_length(key_info->option_list);
+
+  /*
+    if there's at least one option somewhere (res > 0)
+    we write option lists for all fields and keys, zero-terminated.
+    If there're no options we write nothing at all (backward compatibility)
+  */
+  DBUG_RETURN(res ? res + 1 + create_fields.elements + keys : 0);
+}
+
+
+/**
+  Writes image of the key and value to the frm image buffer
+
+  @param buff            pointer to the buffer free space beginning
+
+  @returns pointer to byte after last recorded in the buffer
+*/
+
+uchar *engine_option_value::frm_image(uchar *buff)
+{
+  if (value.str)
+  {
+    *buff++= name.length;
+    memcpy(buff, name.str, name.length);
+    buff+= name.length;
+    int2store(buff, value.length | (quoted_value ? FRM_QUOTED_VALUE : 0));
+    buff+= 2;
+    memcpy(buff, (const uchar *) value.str, value.length);
+    buff+= value.length;
+  }
+  return buff;
+}
+
+/**
+  Writes image of the key and value to the frm image buffer
+
+  @param buff            pointer to the buffer to store the options in
+  @param opt             list of options;
+
+  @returns pointer to the end of the stored data in the buffer
+*/
+static uchar *option_list_frm_image(uchar *buff, engine_option_value *opt)
+{
+  for (; opt; opt= opt->next)
+    buff= opt->frm_image(buff);
+
+  *buff++= 0;
+  return buff;
+}
+
+
+/**
+  Writes options image in the .frm buffer
+
+  @param buff              pointer to the buffer
+  @param table_option_list list of table options
+  @param create_fields     field descriptors list
+  @param keys              number of keys
+  @param key_info          array of key descriptors
+
+  @returns pointer to byte after last recorded in the buffer
+*/
+
+uchar *engine_table_options_frm_image(uchar *buff,
+                                      engine_option_value *table_option_list,
+                                      List<Create_field> &create_fields,
+                                      uint keys, KEY *key_info)
+{
+  List_iterator<Create_field> it(create_fields);
+  Create_field *field;
+  KEY *key_info_end= key_info + keys;
+  DBUG_ENTER("engine_table_options_frm_image");
+
+  buff= option_list_frm_image(buff, table_option_list);
+
+  while ((field= it++))
+    buff= option_list_frm_image(buff, field->option_list);
+
+  while (key_info < key_info_end)
+    buff= option_list_frm_image(buff, (key_info++)->option_list);
+
+  DBUG_RETURN(buff);
+}
+
+/**
+  Reads name and value from buffer, then link it in the list
+
+  @param buff            the buffer to read from
+  @param start           The list beginning or NULL
+  @param end             The list last element or does not matter
+  @param root            MEM_ROOT for allocating
+
+  @returns pointer to byte after last recorded in the buffer
+*/
+uchar *engine_option_value::frm_read(const uchar *buff, engine_option_value **start,
+                                     engine_option_value **end, MEM_ROOT *root)
+{
+  LEX_STRING name, value;
+  uint len;
+
+  name.length= buff[0];
+  buff++;
+  if (!(name.str= strmake_root(root, (const char*)buff, name.length)))
+    return NULL;
+  buff+= name.length;
+  len= uint2korr(buff);
+  value.length= len & ~FRM_QUOTED_VALUE;
+  buff+= 2;
+  if (!(value.str= strmake_root(root, (const char*)buff, value.length)))
+    return NULL;
+  buff+= value.length;
+
+  engine_option_value *ptr=new (root)
+    engine_option_value(name, value, len & FRM_QUOTED_VALUE, start, end);
+  if (!ptr)
+    return NULL;
+
+  return (uchar *)buff;
+}
+
+
+/**
+  Reads options from this buffer
+
+  @param buff            the buffer to read from
+  @param length          buffer length
+  @param share           table descriptor
+  @param root            MEM_ROOT for allocating
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+my_bool engine_table_options_frm_read(const uchar *buff, uint length,
+                                      TABLE_SHARE *share)
+{
+  const uchar *buff_end= buff + length;
+  engine_option_value *end;
+  MEM_ROOT *root= &share->mem_root;
+  uint count;
+  DBUG_ENTER("engine_table_options_frm_read");
+
+  while (buff < buff_end && *buff)
+  {
+    if (!(buff= engine_option_value::frm_read(buff, &share->option_list, &end,
+                                              root)))
+      DBUG_RETURN(TRUE);
+  }
+  buff++;
+
+  for (count=0; count < share->fields; count++)
+  {
+    while (buff < buff_end && *buff)
+    {
+      if (!(buff= engine_option_value::frm_read(buff,
+                                                &share->field[count]->option_list,
+                                                &end, root)))
+        DBUG_RETURN(TRUE);
+    }
+    buff++;
+  }
+
+  for (count=0; count < share->keys; count++)
+  {
+    while (buff < buff_end && *buff)
+    {
+      if (!(buff= engine_option_value::frm_read(buff,
+                                                &share->key_info[count].option_list,
+                                                &end, root)))
+        DBUG_RETURN(TRUE);
+    }
+    buff++;
+  }
+
+  if (buff < buff_end)
+    sql_print_warning("Table '%s' was created in a later MariaDB version - "
+                      "unknown table attributes were ignored",
+                      share->table_name.str);
+
+  DBUG_RETURN(buff > buff_end);
+}
+
+/**
+  Merges two lists of engine_option_value's with duplicate removal.
+*/
+
+engine_option_value *merge_engine_table_options(engine_option_value *first,
+                                                engine_option_value *second,
+                                                MEM_ROOT *root)
+{
+  engine_option_value *end, *opt;
+  DBUG_ENTER("merge_engine_table_options");
+  LINT_INIT(end);
+
+  /* find last element */
+  if (first && second)
+    for (end= first; end->next; end= end->next) /* no-op */;
+
+  for (opt= second; opt; opt= opt->next)
+    new (root) engine_option_value(opt->name, opt->value, opt->quoted_value,
+                                   &first, &end);
+  DBUG_RETURN(first);
+}
diff --git a/sql/create_options.h b/sql/create_options.h
new file mode 100644
index 00000000000..ae918f6cea1
--- /dev/null
+++ b/sql/create_options.h
@@ -0,0 +1,93 @@
+/* Copyright (C) 2010 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/**
+  @file
+
+  Engine defined options of tables/fields/keys in CREATE/ALTER TABLE.
+*/
+
+#ifndef SQL_CREATE_OPTIONS_INCLUDED
+#define SQL_CREATE_OPTIONS_INCLUDED
+
+#include "sql_class.h"
+//#include "handler.h"
+
+class engine_option_value: public Sql_alloc
+{
+ public:
+  LEX_STRING name;
+  LEX_STRING value;
+  engine_option_value *next;    ///< parser puts them in a FIFO linked list
+  bool parsed;                  ///< to detect unrecognized options
+  bool quoted_value;            ///< option=VAL vs. option='VAL'
+
+  engine_option_value(LEX_STRING &name_arg, LEX_STRING &value_arg, bool quoted,
+                      engine_option_value **start, engine_option_value **end) :
+    name(name_arg), value(value_arg),
+    next(NULL), parsed(false), quoted_value(quoted)
+  {
+    link(start, end);
+  }
+  engine_option_value(LEX_STRING &name_arg,
+                      engine_option_value **start, engine_option_value **end) :
+    name(name_arg), value(null_lex_str),
+    next(NULL), parsed(false), quoted_value(false)
+  {
+    link(start, end);
+  }
+  engine_option_value(LEX_STRING &name_arg, ulonglong value_arg,
+                      engine_option_value **start, engine_option_value **end,
+                      MEM_ROOT *root) :
+    name(name_arg), next(NULL), parsed(false), quoted_value(false)
+  {
+    if ((value.str= (char *)alloc_root(root, 22)))
+    {
+      value.length= longlong10_to_str(value_arg, value.str, 10) - value.str;
+      link(start, end);
+    }
+  }
+  static uchar *frm_read(const uchar *buff, engine_option_value **start,
+                         engine_option_value **end, MEM_ROOT *root);
+  void link(engine_option_value **start, engine_option_value **end);
+  uint frm_length();
+  uchar *frm_image(uchar *buff);
+};
+
+typedef struct st_key KEY;
+class Create_field;
+
+my_bool parse_engine_table_options(THD *thd, handlerton *ht,
+                                   TABLE_SHARE *share);
+my_bool parse_option_list(THD* thd, void *option_struct,
+                          engine_option_value *option_list,
+                          ha_create_table_option *rules,
+                          my_bool suppress_warning,
+                          MEM_ROOT *root);
+my_bool engine_table_options_frm_read(const uchar *buff,
+                                      uint length,
+                                      TABLE_SHARE *share);
+engine_option_value *merge_engine_table_options(engine_option_value *source,
+                                                engine_option_value *changes,
+                                                MEM_ROOT *root);
+
+uint engine_table_options_frm_length(engine_option_value *table_option_list,
+                                     List<Create_field> &create_fields,
+                                     uint keys, KEY *key_info);
+uchar *engine_table_options_frm_image(uchar *buff,
+                                      engine_option_value *table_option_list,
+                                      List<Create_field> &create_fields,
+                                      uint keys, KEY *key_info);
+#endif
diff --git a/sql/datadict.h b/sql/datadict.h
index 65e40998929..f852b02f52c 100644
--- a/sql/datadict.h
+++ b/sql/datadict.h
@@ -28,7 +28,6 @@ enum frm_type_enum
   FRMTYPE_VIEW
 };
 
-
 frm_type_enum dd_frm_type(THD *thd, char *path, enum legacy_db_type *dbt);
 
 bool dd_frm_storage_engine(THD *thd, const char *db, const char *table_name,
diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc
index 1575bca427c..093650b3673 100644
--- a/sql/debug_sync.cc
+++ b/sql/debug_sync.cc
@@ -1691,7 +1691,8 @@ static void debug_sync_execute(THD *thd, st_debug_sync_action *action)
 
   if (action->execute)
   {
-    const char *UNINIT_VAR(old_proc_info);
+    const char  *old_proc_info;
+    LINT_INIT(old_proc_info);
 
     action->execute--;
 
@@ -1927,4 +1928,7 @@ bool debug_sync_set_action(THD *thd, const char *action_str, size_t len)
 }
 
 
+#else /* defined(ENABLED_DEBUG_SYNC) */
+/* prevent linker/lib warning about file without public symbols */
+int debug_sync_dummy; 
 #endif /* defined(ENABLED_DEBUG_SYNC) */
diff --git a/sql/derror.cc b/sql/derror.cc
index 2b4cc13073e..23319ac0c99 100644
--- a/sql/derror.cc
+++ b/sql/derror.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (C) 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -28,6 +29,7 @@
 #include "derror.h"                             // read_texts
 #include "sql_class.h"                          // THD
 
+static bool check_error_mesg(const char *file_name, const char **errmsg);
 static void init_myfunc_errs(void);
 
 
@@ -44,9 +46,12 @@ C_MODE_END
   Read messages from errorfile.
 
   This function can be called multiple times to reload the messages.
-  If it fails to load the messages, it will fail softly by initializing
-  the errmesg pointer to an array of empty strings or by keeping the
-  old array if it exists.
+
+  If it fails to load the messages:
+   - If we already have error messages loaded, keep the old ones and
+     return FALSE(ok)
+  - Initializing the errmesg pointer to an array of empty strings
+    and return TRUE (error)
 
   @retval
     FALSE       OK
@@ -56,26 +61,45 @@ C_MODE_END
 
 bool init_errmessage(void)
 {
-  const char **errmsgs, **ptr;
+  const char **errmsgs, **ptr, **org_errmsgs;
+  bool error= FALSE;
   DBUG_ENTER("init_errmessage");
 
   /*
     Get a pointer to the old error messages pointer array.
     read_texts() tries to free it.
   */
-  errmsgs= my_error_unregister(ER_ERROR_FIRST, ER_ERROR_LAST);
+  org_errmsgs= my_error_unregister(ER_ERROR_FIRST, ER_ERROR_LAST);
 
   /* Read messages from file. */
   if (read_texts(ERRMSG_FILE, my_default_lc_messages->errmsgs->language,
                  &errmsgs, ER_ERROR_LAST - ER_ERROR_FIRST + 1) &&
       !errmsgs)
   {
-    if (!(errmsgs= (const char**) my_malloc((ER_ERROR_LAST-ER_ERROR_FIRST+1)*
-                                            sizeof(char*), MYF(0))))
-      DBUG_RETURN(TRUE);
-    for (ptr= errmsgs; ptr < errmsgs + ER_ERROR_LAST - ER_ERROR_FIRST; ptr++)
-	  *ptr= "";
+    free(errmsgs);
+    
+    if (org_errmsgs)
+    {
+      /* Use old error messages */
+      errmsgs= org_errmsgs;
+    }
+    else
+    {
+      /*
+        No error messages.  Create a temporary empty error message so
+        that we don't get a crash if some code wrongly tries to access
+        a non existing error message.
+      */
+      if (!(errmsgs= (const char**) my_malloc((ER_ERROR_LAST-ER_ERROR_FIRST+1)*
+                                              sizeof(char*), MYF(0))))
+        DBUG_RETURN(TRUE);
+      for (ptr= errmsgs; ptr < errmsgs + ER_ERROR_LAST - ER_ERROR_FIRST; ptr++)
+        *ptr= "";
+      error= TRUE;
+    }
   }
+  else
+    free(org_errmsgs);                        // Free old language
 
   /* Register messages for use with my_error(). */
   if (my_error_register(get_server_errmsgs, ER_ERROR_FIRST, ER_ERROR_LAST))
@@ -86,7 +110,29 @@ bool init_errmessage(void)
 
   DEFAULT_ERRMSGS= errmsgs;             /* Init global variable */
   init_myfunc_errs();			/* Init myfunc messages */
-  DBUG_RETURN(FALSE);
+  DBUG_RETURN(error);
+}
+
+
+/**
+   Check the error messages array contains all relevant error messages
+*/
+
+static bool check_error_mesg(const char *file_name, const char **errmsg)
+{
+  /*
+    The last MySQL error message can't be an empty string; If it is,
+    it means that the error file doesn't contain all MySQL messages
+    and is probably from an older version of MySQL / MariaDB.
+  */
+  if (errmsg[ER_LAST_MYSQL_ERROR_MESSAGE -1 - ER_ERROR_FIRST][0] == 0)
+  {
+    sql_print_error("Error message file '%s' is probably from and older "
+                    "version of MariaDB / MYSQL as it doesn't contain all "
+                    "error messages", file_name);
+    return 1;
+  }
+  return 0;
 }
 
 
@@ -107,8 +153,11 @@ bool read_texts(const char *file_name, const char *language,
   char lang_path[FN_REFLEN];
   uchar *buff;
   uchar head[32],*pos;
+  const char *errmsg;
   DBUG_ENTER("read_texts");
 
+  *point= 0;
+
   LINT_INIT(buff);
   funktpos=0;
   convert_dirname(lang_path, language, NullS);
@@ -136,6 +185,7 @@ bool read_texts(const char *file_name, const char *language,
   funktpos=1;
   if (mysql_file_read(file, (uchar*) head, 32, MYF(MY_NABP)))
     goto err;
+  funktpos=2;
   if (head[0] != (uchar) 254 || head[1] != (uchar) 254 ||
       head[2] != 2 || head[3] != 1)
     goto err; /* purecov: inspected */
@@ -147,20 +197,16 @@ bool read_texts(const char *file_name, const char *language,
   if (count < error_messages)
   {
     sql_print_error("\
-Error message file '%s' had only %d error messages,\n\
-but it should contain at least %d error messages.\n\
-Check that the above file is the right version for this program!",
+Error message file '%s' had only %d error messages, but it should contain at least %d error messages.\nCheck that the above file is the right version for this program!",
 		    name,count,error_messages);
     (void) mysql_file_close(file, MYF(MY_WME));
     DBUG_RETURN(1);
   }
 
-  /* Free old language */
-  my_free(*point);
   if (!(*point= (const char**)
 	my_malloc((size_t) (length+count*sizeof(char*)),MYF(0))))
   {
-    funktpos=2;					/* purecov: inspected */
+    funktpos=3;					/* purecov: inspected */
     goto err;					/* purecov: inspected */
   }
   buff= (uchar*) (*point + count);
@@ -180,12 +226,25 @@ Check that the above file is the right version for this program!",
     point[i]= *point +uint2korr(head+10+i+i);
   }
   (void) mysql_file_close(file, MYF(0));
-  DBUG_RETURN(0);
+
+  i= check_error_mesg(file_name, *point);
+  DBUG_RETURN(i);
 
 err:
-  sql_print_error((funktpos == 2) ? "Not enough memory for messagefile '%s'" :
-                  ((funktpos == 1) ? "Can't read from messagefile '%s'" :
-                   "Can't find messagefile '%s'"), name);
+  switch (funktpos) {
+  case 3:
+    errmsg= "Not enough memory for messagefile '%s'";
+    break;
+  case 2:
+    errmsg= "Incompatible header in messagefile '%s'. Probably from another version of MariaDB";
+  case 1:
+    errmsg= "Can't read from messagefile '%s'";
+    break;
+  default:
+    errmsg= "Can't find messagefile '%s'";
+    break;
+  }
+  sql_print_error(errmsg, name);
   if (file != FERR)
     (void) mysql_file_close(file, MYF(MY_WME));
   DBUG_RETURN(1);
diff --git a/sql/discover.cc b/sql/discover.cc
index 33ce06926b5..b9dba92a780 100644
--- a/sql/discover.cc
+++ b/sql/discover.cc
@@ -70,7 +70,7 @@ int readfrm(const char *name, uchar **frmdata, size_t *len)
   error= 2;
   if (mysql_file_fstat(file, &state, MYF(0)))
     goto err;
-  read_len= state.st_size;  
+  read_len= (size_t)state.st_size;  
 
   // Read whole frm file
   error= 3;
diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc
index 2600b23332d..eabe215bc3a 100644
--- a/sql/event_data_objects.cc
+++ b/sql/event_data_objects.cc
@@ -475,7 +475,7 @@ Event_queue_element::load_from_row(THD *thd, TABLE *table)
     DBUG_RETURN(TRUE);
 
   starts_null= table->field[ET_FIELD_STARTS]->is_null();
-  my_bool not_used= FALSE;
+  uint not_used;
   if (!starts_null)
   {
     table->field[ET_FIELD_STARTS]->get_date(&time, TIME_NO_ZERO_DATE);
@@ -656,7 +656,7 @@ add_interval(MYSQL_TIME *ltime, const Time_zone *time_zone,
   if (date_add_interval(ltime, scale, interval))
     return 0;
 
-  my_bool not_used;
+  uint not_used;
   return time_zone->TIME_to_gmt_sec(ltime, &not_used);
 }
 
@@ -737,7 +737,6 @@ bool get_next_time(const Time_zone *time_zone, my_time_t *next,
      would give an error then.
     */
     DBUG_RETURN(1);
-    break;
   case INTERVAL_LAST:
     DBUG_ASSERT(0);
   }
@@ -937,7 +936,7 @@ Event_queue_element::compute_next_execution_time()
     goto ret;
   }
 
-  time_now= (my_time_t) current_thd->query_start();
+  time_now= current_thd->query_start();
 
   DBUG_PRINT("info",("NOW: [%lu]", (ulong) time_now));
 
@@ -1137,7 +1136,7 @@ err:
 void
 Event_queue_element::mark_last_executed(THD *thd)
 {
-  last_executed= (my_time_t) thd->query_start();
+  last_executed= thd->query_start();
 
   execution_count++;
 }
@@ -1158,7 +1157,7 @@ append_datetime(String *buf, Time_zone *time_zone, my_time_t secs,
   */
   MYSQL_TIME time;
   time_zone->gmt_sec_to_TIME(&time, secs);
-  buf->append(dtime_buff, my_datetime_to_str(&time, dtime_buff));
+  buf->append(dtime_buff, my_datetime_to_str(&time, dtime_buff, 0));
   buf->append(STRING_WITH_LEN("'"));
 }
 
@@ -1337,7 +1336,7 @@ Event_job_data::execute(THD *thd, bool drop)
 
   DBUG_ENTER("Event_job_data::execute");
 
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, 0);
 
   /*
     MySQL parser currently assumes that current database is either
@@ -1423,8 +1422,7 @@ Event_job_data::execute(THD *thd, bool drop)
 
     DBUG_ASSERT(sphead);
 
-    if (thd->enable_slow_log)
-      sphead->m_flags|= sp_head::LOG_SLOW_STATEMENTS;
+    sphead->m_flags|= sp_head::LOG_SLOW_STATEMENTS;
     sphead->m_flags|= sp_head::LOG_GENERAL_LOG;
 
     sphead->set_info(0, 0, &thd->lex->sp_chistics, sql_mode);
diff --git a/sql/event_data_objects.h b/sql/event_data_objects.h
index 638bd3cb0aa..2483c564dff 100644
--- a/sql/event_data_objects.h
+++ b/sql/event_data_objects.h
@@ -91,9 +91,9 @@ public:
   my_time_t execute_at;
   my_time_t starts;
   my_time_t ends;
-  my_bool starts_null;
-  my_bool ends_null;
-  my_bool execute_at_null;
+  bool starts_null;
+  bool ends_null;
+  bool execute_at_null;
 
   longlong expression;
   interval_type interval;
diff --git a/sql/event_db_repository.cc b/sql/event_db_repository.cc
index 59d168e01b5..252dac5f01d 100644
--- a/sql/event_db_repository.cc
+++ b/sql/event_db_repository.cc
@@ -115,7 +115,8 @@ const TABLE_FIELD_TYPE event_table_fields[ET_FIELD_COUNT] =
   {
     { C_STRING_WITH_LEN("sql_mode") },
     { C_STRING_WITH_LEN("set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES',"
-    "'IGNORE_SPACE','NOT_USED','ONLY_FULL_GROUP_BY','NO_UNSIGNED_SUBTRACTION',"
+    "'IGNORE_SPACE','IGNORE_BAD_TABLE_OPTIONS','ONLY_FULL_GROUP_BY',"
+    "'NO_UNSIGNED_SUBTRACTION',"
     "'NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB',"
     "'NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40',"
     "'ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES',"
@@ -221,7 +222,7 @@ mysql_event_fill_row(THD *thd,
       Safety: this can only happen if someone started the server
       and then altered mysql.event.
     */
-    my_error(ER_COL_COUNT_DOESNT_MATCH_CORRUPTED, MYF(0), table->alias,
+    my_error(ER_COL_COUNT_DOESNT_MATCH_CORRUPTED, MYF(0), table->alias.c_ptr(),
              (int) ET_FIELD_COUNT, table->s->fields);
     DBUG_RETURN(TRUE);
   }
@@ -248,6 +249,9 @@ mysql_event_fill_row(THD *thd,
     rs|= fields[ET_FIELD_STATUS]->store((longlong)et->status, TRUE);
   rs|= fields[ET_FIELD_ORIGINATOR]->store((longlong)et->originator, TRUE);
 
+  if (!is_update)
+    rs|= fields[ET_FIELD_CREATED]->set_time();
+
   /*
     Change the SQL_MODE only if body was present in an ALTER EVENT and of course
     always during CREATE EVENT.
@@ -294,7 +298,7 @@ mysql_event_fill_row(THD *thd,
       my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->starts);
 
       fields[ET_FIELD_STARTS]->set_notnull();
-      fields[ET_FIELD_STARTS]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      fields[ET_FIELD_STARTS]->store_time(&time);
     }
 
     if (!et->ends_null)
@@ -303,7 +307,7 @@ mysql_event_fill_row(THD *thd,
       my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->ends);
 
       fields[ET_FIELD_ENDS]->set_notnull();
-      fields[ET_FIELD_ENDS]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      fields[ET_FIELD_ENDS]->store_time(&time);
     }
   }
   else if (et->execute_at)
@@ -322,8 +326,7 @@ mysql_event_fill_row(THD *thd,
     my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->execute_at);
 
     fields[ET_FIELD_EXECUTE_AT]->set_notnull();
-    fields[ET_FIELD_EXECUTE_AT]->
-                        store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    fields[ET_FIELD_EXECUTE_AT]->store_time(&time);
   }
   else
   {
@@ -334,7 +337,7 @@ mysql_event_fill_row(THD *thd,
     */
   }
 
-  ((Field_timestamp *)fields[ET_FIELD_MODIFIED])->set_time();
+  rs|= fields[ET_FIELD_MODIFIED]->set_time();
 
   if (et->comment.str)
   {
@@ -439,17 +442,18 @@ Event_db_repository::index_read_for_db_for_i_s(THD *thd, TABLE *schema_table,
   }
 
   key_copy(key_buf, event_table->record[0], key_info, key_len);
-  if (!(ret= event_table->file->index_read_map(event_table->record[0], key_buf,
-                                               (key_part_map)1,
-                                               HA_READ_KEY_EXACT)))
+  if (!(ret= event_table->file->ha_index_read_map(event_table->record[0],
+                                                  key_buf,
+                                                  (key_part_map)1,
+                                                  HA_READ_KEY_EXACT)))
   {
     DBUG_PRINT("info",("Found rows. Let's retrieve them. ret=%d", ret));
     do
     {
       ret= copy_event_to_schema_table(thd, schema_table, event_table);
       if (ret == 0)
-        ret= event_table->file->index_next_same(event_table->record[0],
-                                                key_buf, key_len);
+        ret= event_table->file->ha_index_next_same(event_table->record[0],
+                                                   key_buf, key_len);
     } while (ret == 0);
   }
   DBUG_PRINT("info", ("Scan finished. ret=%d", ret));
@@ -489,7 +493,8 @@ Event_db_repository::table_scan_all_for_i_s(THD *thd, TABLE *schema_table,
   READ_RECORD read_record_info;
   DBUG_ENTER("Event_db_repository::table_scan_all_for_i_s");
 
-  init_read_record(&read_record_info, thd, event_table, NULL, 1, 0, FALSE);
+  if (init_read_record(&read_record_info, thd, event_table, NULL, 1, 0, FALSE))
+    DBUG_RETURN(TRUE);
 
   /*
     rr_sequential, in read_record(), returns 137==HA_ERR_END_OF_FILE,
@@ -715,8 +720,6 @@ Event_db_repository::create_event(THD *thd, Event_parse_data *parse_data,
     goto end;
   }
 
-  ((Field_timestamp *)table->field[ET_FIELD_CREATED])->set_time();
-
   /*
     mysql_event_fill_row() calls my_error() in case of error so no need to
     handle it here
@@ -949,7 +952,11 @@ Event_db_repository::find_named_event(LEX_STRING db, LEX_STRING name,
     same fields.
   */
   if (db.length > table->field[ET_FIELD_DB]->field_length ||
-      name.length > table->field[ET_FIELD_NAME]->field_length)
+      name.length > table->field[ET_FIELD_NAME]->field_length ||
+      table->s->keys == 0 ||
+      table->key_info[0].key_parts != 2 ||
+      table->key_info[0].key_part[0].fieldnr != ET_FIELD_DB+1 ||
+      table->key_info[0].key_part[1].fieldnr != ET_FIELD_NAME+1)
     DBUG_RETURN(TRUE);
 
   table->field[ET_FIELD_DB]->store(db.str, db.length, &my_charset_bin);
@@ -957,8 +964,9 @@ Event_db_repository::find_named_event(LEX_STRING db, LEX_STRING name,
 
   key_copy(key, table->record[0], table->key_info, table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0, key, HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     DBUG_PRINT("info", ("Row not found"));
     DBUG_RETURN(TRUE);
@@ -993,7 +1001,9 @@ Event_db_repository::drop_schema_events(THD *thd, LEX_STRING schema)
     DBUG_VOID_RETURN;
 
   /* only enabled events are in memory, so we go now and delete the rest */
-  init_read_record(&read_record_info, thd, table, NULL, 1, 0, FALSE);
+  if (init_read_record(&read_record_info, thd, table, NULL, 1, 0, FALSE))
+    goto end;
+
   while (!ret && !(read_record_info.read_record(&read_record_info)) )
   {
     char *et_field= get_field(thd->mem_root, table->field[field]);
@@ -1015,6 +1025,8 @@ Event_db_repository::drop_schema_events(THD *thd, LEX_STRING schema)
     }
   }
   end_read_record(&read_record_info);
+
+end:
   close_thread_tables(thd);
   /*
     Make sure to only release the MDL lock on mysql.event, not other
@@ -1128,7 +1140,7 @@ update_timing_fields_for_event(THD *thd,
 
   my_tz_OFFSET0->gmt_sec_to_TIME(&time, last_executed);
   fields[ET_FIELD_LAST_EXECUTED]->set_notnull();
-  fields[ET_FIELD_LAST_EXECUTED]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+  fields[ET_FIELD_LAST_EXECUTED]->store_time(&time);
 
   fields[ET_FIELD_STATUS]->set_notnull();
   fields[ET_FIELD_STATUS]->store(status, TRUE);
diff --git a/sql/event_parse_data.cc b/sql/event_parse_data.cc
index 88e867295a3..b53e366e27c 100644
--- a/sql/event_parse_data.cc
+++ b/sql/event_parse_data.cc
@@ -113,7 +113,7 @@ Event_parse_data::init_name(THD *thd, sp_name *spn)
 void
 Event_parse_data::check_if_in_the_past(THD *thd, my_time_t ltime_utc)
 {
-  if (ltime_utc >= (my_time_t) thd->query_start())
+  if (ltime_utc >= thd->query_start())
     return;
 
   /*
@@ -201,7 +201,7 @@ Event_parse_data::check_dates(THD *thd, int previous_on_completion)
 int
 Event_parse_data::init_execute_at(THD *thd)
 {
-  my_bool not_used;
+  uint not_used;
   MYSQL_TIME ltime;
   my_time_t ltime_utc;
 
@@ -218,7 +218,7 @@ Event_parse_data::init_execute_at(THD *thd)
                       (starts_null && ends_null)));
   DBUG_ASSERT(starts_null && ends_null);
 
-  if ((not_used= item_execute_at->get_date(&ltime, TIME_NO_ZERO_DATE)))
+  if (item_execute_at->get_date(&ltime, TIME_NO_ZERO_DATE))
     goto wrong_value;
 
   ltime_utc= TIME_to_timestamp(thd,&ltime,&not_used);
@@ -256,7 +256,6 @@ wrong_value:
 int
 Event_parse_data::init_interval(THD *thd)
 {
-  String value;
   INTERVAL interval_tmp;
 
   DBUG_ENTER("Event_parse_data::init_interval");
@@ -278,8 +277,7 @@ Event_parse_data::init_interval(THD *thd)
   if (item_expression->fix_fields(thd, &item_expression))
     goto wrong_value;
 
-  value.alloc(MAX_DATETIME_FULL_WIDTH*MY_CHARSET_BIN_MB_MAXLEN);
-  if (get_interval_value(item_expression, interval, &value, &interval_tmp))
+  if (get_interval_value(item_expression, interval, &interval_tmp))
     goto wrong_value;
 
   expression= 0;
@@ -371,7 +369,7 @@ wrong_value:
 int
 Event_parse_data::init_starts(THD *thd)
 {
-  my_bool not_used;
+  uint not_used;
   MYSQL_TIME ltime;
   my_time_t ltime_utc;
 
@@ -382,7 +380,7 @@ Event_parse_data::init_starts(THD *thd)
   if (item_starts->fix_fields(thd, &item_starts))
     goto wrong_value;
 
-  if ((not_used= item_starts->get_date(&ltime, TIME_NO_ZERO_DATE)))
+  if (item_starts->get_date(&ltime, TIME_NO_ZERO_DATE))
     goto wrong_value;
 
   ltime_utc= TIME_to_timestamp(thd, &ltime, &not_used);
@@ -425,7 +423,7 @@ wrong_value:
 int
 Event_parse_data::init_ends(THD *thd)
 {
-  my_bool not_used;
+  uint not_used;
   MYSQL_TIME ltime;
   my_time_t ltime_utc;
 
@@ -437,7 +435,7 @@ Event_parse_data::init_ends(THD *thd)
     goto error_bad_params;
 
   DBUG_PRINT("info", ("convert to TIME"));
-  if ((not_used= item_ends->get_date(&ltime, TIME_NO_ZERO_DATE)))
+  if (item_ends->get_date(&ltime, TIME_NO_ZERO_DATE))
     goto error_bad_params;
 
   ltime_utc= TIME_to_timestamp(thd, &ltime, &not_used);
diff --git a/sql/event_parse_data.h b/sql/event_parse_data.h
index 43445f24a5e..1ea22edf488 100644
--- a/sql/event_parse_data.h
+++ b/sql/event_parse_data.h
@@ -78,9 +78,9 @@ public:
   my_time_t starts;
   my_time_t ends;
   my_time_t execute_at;
-  my_bool starts_null;
-  my_bool ends_null;
-  my_bool execute_at_null;
+  bool starts_null;
+  bool ends_null;
+  bool execute_at_null;
 
   sp_name *identifier;
   Item* item_expression;
diff --git a/sql/event_queue.cc b/sql/event_queue.cc
index 864c0322f57..40fddff094c 100644
--- a/sql/event_queue.cc
+++ b/sql/event_queue.cc
@@ -138,9 +138,9 @@ Event_queue::init_queue(THD *thd)
 
   LOCK_QUEUE_DATA();
 
-  if (init_queue_ex(&queue, EVENT_QUEUE_INITIAL_SIZE , 0 /*offset*/,
-                    0 /*max_on_top*/, event_queue_element_compare_q,
-                    NULL, EVENT_QUEUE_EXTENT))
+  if (::init_queue(&queue, EVENT_QUEUE_INITIAL_SIZE , 0 /*offset*/,
+                   0 /*max_on_top*/, event_queue_element_compare_q,
+                   NullS, 0, EVENT_QUEUE_EXTENT))
   {
     sql_print_error("Event Scheduler: Can't initialize the execution queue");
     goto err;
@@ -327,11 +327,13 @@ void
 Event_queue::drop_matching_events(THD *thd, LEX_STRING pattern,
                            bool (*comparator)(LEX_STRING, Event_basic *))
 {
-  uint i= 0;
+  uint i;
   DBUG_ENTER("Event_queue::drop_matching_events");
   DBUG_PRINT("enter", ("pattern=%s", pattern.str));
 
-  while (i < queue.elements)
+  for (i= queue_first_element(&queue) ;
+       i <= queue_last_element(&queue) ;
+       )
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     DBUG_PRINT("info", ("[%s.%s]?", et->dbname.str, et->name.str));
@@ -341,7 +343,8 @@ Event_queue::drop_matching_events(THD *thd, LEX_STRING pattern,
         The queue is ordered. If we remove an element, then all elements
         after it will shift one position to the left, if we imagine it as
         an array from left to the right. In this case we should not
-        increment the counter and the (i < queue.elements) condition is ok.
+        increment the counter and the (i <= queue_last_element() condition
+        is ok.
       */
       queue_remove(&queue, i);
       delete et;
@@ -405,7 +408,9 @@ Event_queue::find_n_remove_event(LEX_STRING db, LEX_STRING name)
   uint i;
   DBUG_ENTER("Event_queue::find_n_remove_event");
 
-  for (i= 0; i < queue.elements; ++i)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     DBUG_PRINT("info", ("[%s.%s]==[%s.%s]?", db.str, name.str,
@@ -443,7 +448,9 @@ Event_queue::recalculate_activation_times(THD *thd)
 
   LOCK_QUEUE_DATA();
   DBUG_PRINT("info", ("%u loaded events to be recalculated", queue.elements));
-  for (i= 0; i < queue.elements; i++)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     ((Event_queue_element*)queue_element(&queue, i))->compute_next_execution_time();
   }
@@ -455,16 +462,19 @@ Event_queue::recalculate_activation_times(THD *thd)
     have removed all. The queue has been ordered in a way the disabled
     events are at the end.
   */
-  for (i= queue.elements; i > 0; i--)
+  for (i= queue_last_element(&queue);
+       (int) i >= (int) queue_first_element(&queue);
+       i--)
   {
-    Event_queue_element *element = (Event_queue_element*)queue_element(&queue, i - 1);
+    Event_queue_element *element=
+      (Event_queue_element*)queue_element(&queue, i);
     if (element->status != Event_parse_data::DISABLED)
       break;
     /*
       This won't cause queue re-order, because we remove
       always the last element.
     */
-    queue_remove(&queue, i - 1);
+    queue_remove(&queue, i);
     delete element;
   }
   UNLOCK_QUEUE_DATA();
@@ -500,7 +510,9 @@ Event_queue::empty_queue()
   sql_print_information("Event Scheduler: Purging the queue. %u events",
                         queue.elements);
   /* empty the queue */
-  for (i= 0; i < queue.elements; ++i)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     delete et;
@@ -519,14 +531,17 @@ Event_queue::empty_queue()
 */
 
 void
-Event_queue::dbug_dump_queue(time_t now)
+Event_queue::dbug_dump_queue(my_time_t when)
 {
 #ifndef DBUG_OFF
+  my_time_t now= when;
   Event_queue_element *et;
   uint i;
   DBUG_ENTER("Event_queue::dbug_dump_queue");
   DBUG_PRINT("info", ("Dumping queue . Elements=%u", queue.elements));
-  for (i = 0; i < queue.elements; i++)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     et= ((Event_queue_element*)queue_element(&queue, i));
     DBUG_PRINT("info", ("et: 0x%lx  name: %s.%s", (long) et,
@@ -598,7 +613,7 @@ Event_queue::get_top_for_execution_if_time(THD *thd,
       continue;
     }
 
-    top= ((Event_queue_element*) queue_element(&queue, 0));
+    top= (Event_queue_element*) queue_top(&queue);
 
     thd->set_current_time(); /* Get current time */
 
@@ -609,14 +624,12 @@ Event_queue::get_top_for_execution_if_time(THD *thd,
         Not yet time for top event, wait on condition with
         time or until signaled. Release LOCK_queue while waiting.
       */
-      struct timespec top_time;
-      set_timespec(top_time, next_activation_at - thd->query_start());
+      struct timespec top_time= { next_activation_at, 0 };
 
       /* Release any held audit resources before waiting */
       mysql_audit_release(thd);
 
       cond_wait(thd, &top_time, queue_wait_msg, SCHED_FUNC, __LINE__);
-
       continue;
     }
 
@@ -650,10 +663,10 @@ Event_queue::get_top_for_execution_if_time(THD *thd,
                             top->dbname.str, top->name.str,
                             top->dropped? "Dropping.":"");
       delete top;
-      queue_remove(&queue, 0);
+      queue_remove_top(&queue);
     }
     else
-      queue_replaced(&queue);
+      queue_replace_top(&queue);
 
     dbug_dump_queue(thd->query_start());
     break;
@@ -759,6 +772,7 @@ Event_queue::cond_wait(THD *thd, struct timespec *abstime, const char* msg,
 
   if (!thd->killed)
   {
+    DBUG_PRINT("info", ("pthread_cond_%swait", abstime ? "timed" : ""));
     if (!abstime)
       mysql_cond_wait(&COND_queue_state, &LOCK_event_queue);
     else
diff --git a/sql/event_queue.h b/sql/event_queue.h
index 71f6ca3f71e..affa306b259 100644
--- a/sql/event_queue.h
+++ b/sql/event_queue.h
@@ -107,7 +107,7 @@ private:
 
 
   void
-  dbug_dump_queue(time_t now);
+  dbug_dump_queue(my_time_t now);
 
   /* LOCK_event_queue is the mutex which protects the access to the queue. */
   mysql_mutex_t LOCK_event_queue;
diff --git a/sql/event_scheduler.cc b/sql/event_scheduler.cc
index f1c684add2b..8944b749305 100644
--- a/sql/event_scheduler.cc
+++ b/sql/event_scheduler.cc
@@ -104,7 +104,7 @@ Event_worker_thread::print_warnings(THD *thd, Event_job_data *et)
                    err->get_message_octet_length(), system_charset_info);
     DBUG_ASSERT(err->get_level() < 3);
     (sql_print_message_handlers[err->get_level()])("%*s", err_msg.length(),
-                                                   err_msg.c_ptr());
+                                                   err_msg.c_ptr_safe());
   }
   DBUG_VOID_RETURN;
 }
@@ -349,6 +349,14 @@ Event_scheduler::Event_scheduler(Event_queue *queue_arg)
   mysql_mutex_init(key_event_scheduler_LOCK_scheduler_state,
                    &LOCK_scheduler_state, MY_MUTEX_INIT_FAST);
   mysql_cond_init(key_event_scheduler_COND_state, &COND_state, NULL);
+
+#ifdef SAFE_MUTEX
+  /* Ensure right mutex order */
+  mysql_mutex_lock(&LOCK_scheduler_state);
+  mysql_mutex_lock(&LOCK_global_system_variables);
+  mysql_mutex_unlock(&LOCK_global_system_variables);
+  mysql_mutex_unlock(&LOCK_scheduler_state);
+#endif
 }
 
 
@@ -657,7 +665,14 @@ Event_scheduler::stop()
     /* thd could be 0x0, when shutting down */
     sql_print_information("Event Scheduler: "
                           "Waiting for the scheduler thread to reply");
-    COND_STATE_WAIT(thd, NULL, "Waiting scheduler to stop");
+
+    /*
+      Wait only 2 seconds, as there is a small chance the thread missed the
+      above awake() call and we may have to do it again
+    */
+    struct timespec top_time;
+    set_timespec(top_time, 2);
+    COND_STATE_WAIT(thd, &top_time, "Waiting scheduler to stop");
   } while (state == STOPPING);
   DBUG_PRINT("info", ("Scheduler thread has cleaned up. Set state to INIT"));
   sql_print_information("Event Scheduler: Stopped");
diff --git a/sql/events.cc b/sql/events.cc
index 2cedb007462..00299463dba 100644
--- a/sql/events.cc
+++ b/sql/events.cc
@@ -234,7 +234,6 @@ common_1_lev_code:
   case INTERVAL_MICROSECOND:
     my_error(ER_NOT_SUPPORTED_YET, MYF(0), "MICROSECOND");
     return 1;
-    break;
   case INTERVAL_QUARTER:
     expr/= 3;
     close_quote= FALSE;
@@ -393,7 +392,7 @@ Events::create_event(THD *thd, Event_parse_data *parse_data,
           If the definer is not set or set to CURRENT_USER, the value of CURRENT_USER
           will be written into the binary log as the definer for the SQL thread.
         */
-        ret= write_bin_log(thd, TRUE, log_query.c_ptr(), log_query.length());
+        ret= write_bin_log(thd, TRUE, log_query.ptr(), log_query.length());
     }
   }
   /* Restore the state of binlog format */
@@ -668,7 +667,7 @@ send_show_create_event(THD *thd, Event_timed *et, Protocol *protocol)
   protocol->store(et->name.str, et->name.length, system_charset_info);
   protocol->store(sql_mode.str, sql_mode.length, system_charset_info);
   protocol->store(tz_name->ptr(), tz_name->length(), system_charset_info);
-  protocol->store(show_str.c_ptr(), show_str.length(),
+  protocol->store(show_str.ptr(), show_str.length(),
                   et->creation_ctx->get_client_cs());
   protocol->store(et->creation_ctx->get_client_cs()->csname,
                   strlen(et->creation_ctx->get_client_cs()->csname),
@@ -794,7 +793,7 @@ Events::fill_schema_events(THD *thd, TABLE_LIST *tables, COND * /* cond */)
 */
 
 bool
-Events::init(my_bool opt_noacl_or_bootstrap)
+Events::init(bool opt_noacl_or_bootstrap)
 {
 
   THD *thd;
@@ -1076,7 +1075,12 @@ Events::load_events_from_db(THD *thd)
     DBUG_RETURN(TRUE);
   }
 
-  init_read_record(&read_record_info, thd, table, NULL, 0, 1, FALSE);
+  if (init_read_record(&read_record_info, thd, table, NULL, 0, 1, FALSE))
+  {
+    close_thread_tables(thd);
+    DBUG_RETURN(TRUE);
+  }
+
   while (!(read_record_info.read_record(&read_record_info)))
   {
     Event_queue_element *et;
@@ -1127,8 +1131,9 @@ Events::load_events_from_db(THD *thd)
       }
     }
   }
-  sql_print_information("Event Scheduler: Loaded %d event%s",
-                        count, (count == 1) ? "" : "s");
+  if (global_system_variables.log_warnings)
+    sql_print_information("Event Scheduler: Loaded %d event%s",
+                          count, (count == 1) ? "" : "s");
   ret= FALSE;
 
 end:
diff --git a/sql/events.h b/sql/events.h
index a337b29049a..a94fbc17135 100644
--- a/sql/events.h
+++ b/sql/events.h
@@ -41,7 +41,6 @@ class Event_scheduler;
 struct TABLE_LIST;
 class THD;
 typedef class Item COND;
-typedef struct charset_info_st CHARSET_INFO;
 
 int
 sortcmp_lex_string(LEX_STRING s, LEX_STRING t, CHARSET_INFO *cs);
@@ -88,7 +87,7 @@ public:
   get_db_repository() { return db_repository; }
 
   static bool
-  init(my_bool opt_noacl);
+  init(bool opt_noacl);
 
   static void
   deinit();
diff --git a/sql/examples/CMakeLists.txt b/sql/examples/CMakeLists.txt
index abe4de402bf..c4ea4c25679 100644
--- a/sql/examples/CMakeLists.txt
+++ b/sql/examples/CMakeLists.txt
@@ -13,9 +13,6 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFE_MUTEX")
-
 INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql
                     ${CMAKE_SOURCE_DIR}/extra/yassl/include
                     ${CMAKE_SOURCE_DIR}/regex)
diff --git a/sql/field.cc b/sql/field.cc
index e1a24e82718..550cc435467 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -1,5 +1,6 @@
 /*
-   Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -54,6 +55,17 @@ template class List<Create_field>;
 template class List_iterator<Create_field>;
 #endif
 
+static const char *zero_timestamp="0000-00-00 00:00:00.000000";
+
+/* number of bytes to store second_part part of the TIMESTAMP(N) */
+static uint sec_part_bytes[MAX_DATETIME_PRECISION+1]= { 0, 1, 1, 2, 2, 3, 3 };
+
+/* number of bytes to store DATETIME(N) */
+static uint datetime_hires_bytes[MAX_DATETIME_PRECISION+1]= { 5, 6, 6, 7, 7, 7, 8 };
+
+/* number of bytes to store TIME(N) */
+static uint time_hires_bytes[MAX_DATETIME_PRECISION+1]= { 3, 4, 4, 5, 5, 5, 6 };
+
 uchar Field_null::null[1]={1};
 const char field_separator=',';
 
@@ -64,7 +76,7 @@ const char field_separator=',';
 ((ulong) ((LL(1) << min(arg, 4) * 8) - LL(1)))
 
 #define ASSERT_COLUMN_MARKED_FOR_READ DBUG_ASSERT(!table || (!table->read_set || bitmap_is_set(table->read_set, field_index)))
-#define ASSERT_COLUMN_MARKED_FOR_WRITE DBUG_ASSERT(!table || (!table->write_set || bitmap_is_set(table->write_set, field_index)))
+#define ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED DBUG_ASSERT(!table || (!table->write_set || bitmap_is_set(table->write_set, field_index) || bitmap_is_set(table->vcol_set, field_index)))
 
 #define FLAGSTR(S,F) ((S) & (F) ? #F " " : "")
 
@@ -78,7 +90,7 @@ const char field_separator=',';
 #define FIELDTYPE_TEAR_FROM (MYSQL_TYPE_BIT + 1)
 #define FIELDTYPE_TEAR_TO   (MYSQL_TYPE_NEWDECIMAL - 1)
 #define FIELDTYPE_NUM (FIELDTYPE_TEAR_FROM + (255 - FIELDTYPE_TEAR_TO))
-inline int field_type2index (enum_field_types field_type)
+static inline int field_type2index (enum_field_types field_type)
 {
   return (field_type < FIELDTYPE_TEAR_FROM ?
           field_type :
@@ -1221,7 +1233,8 @@ bool Field_num::get_int(CHARSET_INFO *cs, const char *from, uint len,
   if (unsigned_flag)
   {
 
-    if ((((ulonglong) *rnd > unsigned_max) && (*rnd= (longlong) unsigned_max)) ||
+    if ((((ulonglong) *rnd > unsigned_max) &&
+         (*rnd= (longlong) unsigned_max)) ||
         error == MY_ERRNO_ERANGE)
     {
       goto out_of_range;
@@ -1284,7 +1297,7 @@ int Field::warn_if_overflow(int op_result)
   This is used for printing bit_fields as numbers while debugging.
 */
 
-String *Field::val_int_as_str(String *val_buffer, my_bool unsigned_val)
+String *Field::val_int_as_str(String *val_buffer, bool unsigned_val)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   CHARSET_INFO *cs= &my_charset_bin;
@@ -1306,13 +1319,13 @@ String *Field::val_int_as_str(String *val_buffer, my_bool unsigned_val)
 Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,
 	     uchar null_bit_arg,
 	     utype unireg_check_arg, const char *field_name_arg)
-  :ptr(ptr_arg), null_ptr(null_ptr_arg),
-   table(0), orig_table(0), table_name(0),
-   field_name(field_name_arg),
-   key_start(0), part_of_key(0), part_of_key_not_clustered(0),
-   part_of_sortkey(0), unireg_check(unireg_check_arg),
-   field_length(length_arg), null_bit(null_bit_arg), 
-   is_created_from_null_item(FALSE)
+  :ptr(ptr_arg), null_ptr(null_ptr_arg), table(0), orig_table(0),
+  table_name(0), field_name(field_name_arg), option_list(0),
+  option_struct(0), key_start(0), part_of_key(0),
+  part_of_key_not_clustered(0), part_of_sortkey(0),
+  unireg_check(unireg_check_arg), field_length(length_arg),
+  null_bit(null_bit_arg), is_created_from_null_item(FALSE), vcol_info(0),
+  stored_in_db(TRUE)
 {
   flags=null_ptr ? 0: NOT_NULL_FLAG;
   comment.str= (char*) "";
@@ -1432,13 +1445,6 @@ int Field::store(const char *to, uint length, CHARSET_INFO *cs,
    should be overridden. The other functions are just convenience
    functions and hence should not be overridden.
 
-   The value of <code>low_byte_first</code> is dependent on how the
-   packed data is going to be used: for local use, e.g., temporary
-   store on disk or in memory, use the native format since that is
-   faster. For data that is going to be transfered to other machines
-   (e.g., when writing data to the binary log), data should always be
-   stored in little-endian format.
-
    @note The default method for packing fields just copy the raw bytes
    of the record into the destination, but never more than
    <code>max_length</code> characters.
@@ -1456,15 +1462,9 @@ int Field::store(const char *to, uint length, CHARSET_INFO *cs,
    is 1000. This information is sometimes needed to decide how to pack
    the data.
 
-   @param low_byte_first
-   @c TRUE if integers should be stored little-endian, @c FALSE if
-   native format should be used. Note that for little-endian machines,
-   the value of this flag is a moot point since the native format is
-   little-endian.
 */
 uchar *
-Field::pack(uchar *to, const uchar *from, uint max_length,
-            bool low_byte_first __attribute__((unused)))
+Field::pack(uchar *to, const uchar *from, uint max_length)
 {
   uint32 length= pack_length();
   set_if_smaller(length, max_length);
@@ -1495,16 +1495,10 @@ Field::pack(uchar *to, const uchar *from, uint max_length,
    @param   param_data Real type and original pack length of the field
                        data
 
-   @param low_byte_first
-   If this flag is @c true, all composite entities (e.g., lengths)
-   should be unpacked in little-endian format; otherwise, the entities
-   are unpacked in native order.
-
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field::unpack(uchar* to, const uchar *from, uint param_data,
-              bool low_byte_first __attribute__((unused)))
+Field::unpack(uchar* to, const uchar *from, uint param_data)
 {
   uint length=pack_length();
   int from_type= 0;
@@ -1565,9 +1559,9 @@ void Field::make_field(Send_field *field)
   }
   else
     field->org_table_name= field->db_name= "";
-  if (orig_table && orig_table->alias)
+  if (orig_table && orig_table->alias.ptr())
   {
-    field->table_name= orig_table->alias;
+    field->table_name= orig_table->alias.ptr();
     field->org_col_name= field_name;
   }
   else
@@ -1607,17 +1601,19 @@ longlong Field::convert_decimal2longlong(const my_decimal *val,
       i= 0;
       *err= 1;
     }
-    else if (warn_if_overflow(my_decimal2int(E_DEC_ERROR &
-                                           ~E_DEC_OVERFLOW & ~E_DEC_TRUNCATED,
-                                           val, TRUE, &i)))
+    else if (warn_if_overflow(my_decimal2int((E_DEC_ERROR &
+                                              ~E_DEC_OVERFLOW &
+                                              ~E_DEC_TRUNCATED),
+                                             val, TRUE, &i)))
     {
       i= ~(longlong) 0;
       *err= 1;
     }
   }
-  else if (warn_if_overflow(my_decimal2int(E_DEC_ERROR &
-                                         ~E_DEC_OVERFLOW & ~E_DEC_TRUNCATED,
-                                         val, FALSE, &i)))
+  else if (warn_if_overflow(my_decimal2int((E_DEC_ERROR &
+                                            ~E_DEC_OVERFLOW &
+                                            ~E_DEC_TRUNCATED),
+                                           val, FALSE, &i)))
   {
     i= (val->sign() ? LONGLONG_MIN : LONGLONG_MAX);
     *err= 1;
@@ -1642,7 +1638,7 @@ longlong Field::convert_decimal2longlong(const my_decimal *val,
 
 int Field_num::store_decimal(const my_decimal *val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int err= 0;
   longlong i= convert_decimal2longlong(val, unsigned_flag, &err);
   return test(err | store(i, unsigned_flag));
@@ -1714,7 +1710,7 @@ void Field_num::make_field(Send_field *field)
 
 int Field_str::store_decimal(const my_decimal *d)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   double val;
   /* TODO: use decimal2string? */
   int err= warn_if_overflow(my_decimal2double(E_DEC_FATAL_ERROR &
@@ -1735,8 +1731,8 @@ my_decimal *Field_str::val_decimal(my_decimal *decimal_value)
 uint Field::fill_cache_field(CACHE_FIELD *copy)
 {
   uint store_length;
-  copy->str=ptr;
-  copy->length=pack_length();
+  copy->str= ptr;
+  copy->length= pack_length();
   copy->field= this;
   if (flags & BLOB_FLAG)
   {
@@ -1748,9 +1744,15 @@ uint Field::fill_cache_field(CACHE_FIELD *copy)
            (type() == MYSQL_TYPE_STRING && copy->length >= 4 &&
             copy->length < 256))
   {
-    copy->type= CACHE_STRIPPED;
+    copy->type= CACHE_STRIPPED;			    /* Remove end space */
     store_length= 2;
   }
+  else if (type() ==  MYSQL_TYPE_VARCHAR)
+  {
+    copy->type= pack_length()-row_pack_length() == 1 ? CACHE_VARSTR1:
+                                                      CACHE_VARSTR2;
+    store_length= 0;
+  }
   else
   {
     copy->type= 0;
@@ -1771,16 +1773,6 @@ bool Field::get_date(MYSQL_TIME *ltime,uint fuzzydate)
   return 0;
 }
 
-bool Field::get_time(MYSQL_TIME *ltime)
-{
-  char buff[40];
-  String tmp(buff,sizeof(buff),&my_charset_bin),*res;
-  if (!(res=val_str(&tmp)) ||
-      str_to_time_with_warn(res->charset(), res->ptr(), res->length(), ltime))
-    return 1;
-  return 0;
-}
-
 /**
   This is called when storing a date in a string.
 
@@ -1788,11 +1780,11 @@ bool Field::get_time(MYSQL_TIME *ltime)
     Needs to be changed if/when we want to support different time formats.
 */
 
-int Field::store_time(MYSQL_TIME *ltime, timestamp_type type_arg)
+int Field::store_time_dec(MYSQL_TIME *ltime, uint dec)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[MAX_DATE_STRING_REP_LENGTH];
-  uint length= (uint) my_TIME_to_str(ltime, buff);
+  uint length= (uint) my_TIME_to_str(ltime, buff, dec);
   /* Avoid conversion when field character set is ASCII compatible */
   return store(buff, length, (charset()->state & MY_CS_NONASCII) ?
                               &my_charset_latin1 : charset());
@@ -1919,7 +1911,7 @@ void Field_decimal::overflow(bool negative)
 
 int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[STRING_BUFFER_USUAL_SIZE];
   String tmp(buff,sizeof(buff), &my_charset_bin);
   const uchar *from= (uchar*) from_arg;
@@ -2285,7 +2277,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs)
 
 int Field_decimal::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   if (unsigned_flag && nr < 0)
   {
     overflow(1);
@@ -2324,7 +2316,7 @@ int Field_decimal::store(double nr)
 
 int Field_decimal::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[22];
   uint length, int_part;
   char fyllchar;
@@ -2604,7 +2596,7 @@ void Field_new_decimal::set_value_on_overflow(my_decimal *decimal_value,
 
 bool Field_new_decimal::store_value(const my_decimal *decimal_value)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   DBUG_ENTER("Field_new_decimal::store_value");
 #ifndef DBUG_OFF
@@ -2649,7 +2641,7 @@ bool Field_new_decimal::store_value(const my_decimal *decimal_value)
 int Field_new_decimal::store(const char *from, uint length,
                              CHARSET_INFO *charset_arg)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int err;
   my_decimal decimal_value;
   DBUG_ENTER("Field_new_decimal::store(char*)");
@@ -2710,7 +2702,7 @@ int Field_new_decimal::store(const char *from, uint length,
 
 int Field_new_decimal::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   my_decimal decimal_value;
   int err;
   DBUG_ENTER("Field_new_decimal::store(double)");
@@ -2734,7 +2726,7 @@ int Field_new_decimal::store(double nr)
 
 int Field_new_decimal::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   my_decimal decimal_value;
   int err;
 
@@ -2756,15 +2748,15 @@ int Field_new_decimal::store(longlong nr, bool unsigned_val)
 
 int Field_new_decimal::store_decimal(const my_decimal *decimal_value)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   return store_value(decimal_value);
 }
 
 
-int Field_new_decimal::store_time(MYSQL_TIME *ltime, timestamp_type t_type)
+int Field_new_decimal::store_time_dec(MYSQL_TIME *ltime, uint dec)
 {
-    my_decimal decimal_value;
-    return store_value(date2my_decimal(ltime, &decimal_value));
+  my_decimal decimal_value;
+  return store_value(date2my_decimal(ltime, &decimal_value));
 }
 
 
@@ -2915,13 +2907,10 @@ uint Field_new_decimal::is_equal(Create_field *new_field)
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field_new_decimal::unpack(uchar* to,
-                          const uchar *from,
-                          uint param_data,
-                          bool low_byte_first)
+Field_new_decimal::unpack(uchar* to, const uchar *from, uint param_data)
 {
   if (param_data == 0)
-    return Field::unpack(to, from, param_data, low_byte_first);
+    return Field::unpack(to, from, param_data);
 
   uint from_precision= (param_data & 0xff00) >> 8U;
   uint from_decimal= param_data & 0x00ff;
@@ -2955,13 +2944,22 @@ Field_new_decimal::unpack(uchar* to,
   return from+len;
 }
 
+int Field_num::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  longlong v= TIME_to_ulonglong(ltime);
+  if (ltime->neg == 0)
+    return store(v, true);
+  return store(-v, false);
+}
+
+
 /****************************************************************************
 ** tiny int
 ****************************************************************************/
 
 int Field_tiny::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error;
   longlong rnd;
   
@@ -2973,7 +2971,7 @@ int Field_tiny::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_tiny::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   nr=rint(nr);
   if (unsigned_flag)
@@ -2986,18 +2984,18 @@ int Field_tiny::store(double nr)
     }
     else if (nr > 255.0)
     {
-      *ptr=(char) 255;
+      *ptr= (uchar) 255;
       set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
       error= 1;
     }
     else
-      *ptr=(char) nr;
+      *ptr= (uchar) nr;
   }
   else
   {
     if (nr < -128.0)
     {
-      *ptr= (char) -128;
+      *ptr= (uchar) -128;
       set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
       error= 1;
     }
@@ -3008,7 +3006,7 @@ int Field_tiny::store(double nr)
       error= 1;
     }
     else
-      *ptr=(char) (int) nr;
+      *ptr=(uchar) (int) nr;
   }
   return error;
 }
@@ -3016,7 +3014,7 @@ int Field_tiny::store(double nr)
 
 int Field_tiny::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
 
   if (unsigned_flag)
@@ -3137,28 +3135,21 @@ void Field_tiny::sql_type(String &res) const
 
 int Field_short::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int store_tmp;
   int error;
   longlong rnd;
   
   error= get_int(cs, from, len, &rnd, UINT_MAX16, INT_MIN16, INT_MAX16);
   store_tmp= unsigned_flag ? (int) (ulonglong) rnd : (int) rnd;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int2store(ptr, store_tmp);
-  }
-  else
-#endif
-    shortstore(ptr, (short) store_tmp);
+  int2store(ptr, store_tmp);
   return error;
 }
 
 
 int Field_short::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   int16 res;
   nr=rint(nr);
@@ -3196,21 +3187,14 @@ int Field_short::store(double nr)
     else
       res=(int16) (int) nr;
   }
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int2store(ptr,res);
-  }
-  else
-#endif
-    shortstore(ptr,res);
+  int2store(ptr,res);
   return error;
 }
 
 
 int Field_short::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   int16 res;
 
@@ -3251,14 +3235,7 @@ int Field_short::store(longlong nr, bool unsigned_val)
     else
       res=(int16) nr;
   }
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int2store(ptr,res);
-  }
-  else
-#endif
-    shortstore(ptr,res);
+  int2store(ptr,res);
   return error;
 }
 
@@ -3267,12 +3244,7 @@ double Field_short::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   short j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint2korr(ptr);
-  else
-#endif
-    shortget(j,ptr);
+  j=sint2korr(ptr);
   return unsigned_flag ? (double) (unsigned short) j : (double) j;
 }
 
@@ -3280,12 +3252,7 @@ longlong Field_short::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   short j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint2korr(ptr);
-  else
-#endif
-    shortget(j,ptr);
+  j=sint2korr(ptr);
   return unsigned_flag ? (longlong) (unsigned short) j : (longlong) j;
 }
 
@@ -3300,12 +3267,7 @@ String *Field_short::val_str(String *val_buffer,
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   short j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint2korr(ptr);
-  else
-#endif
-    shortget(j,ptr);
+  j=sint2korr(ptr);
 
   if (unsigned_flag)
     length=(uint) cs->cset->long10_to_str(cs, to, mlength, 10, 
@@ -3329,18 +3291,8 @@ bool Field_short::send_binary(Protocol *protocol)
 int Field_short::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   short a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    a=sint2korr(a_ptr);
-    b=sint2korr(b_ptr);
-  }
-  else
-#endif
-  {
-    shortget(a,a_ptr);
-    shortget(b,b_ptr);
-  }
+  a=sint2korr(a_ptr);
+  b=sint2korr(b_ptr);
 
   if (unsigned_flag)
     return ((unsigned short) a < (unsigned short) b) ? -1 :
@@ -3350,24 +3302,11 @@ int Field_short::cmp(const uchar *a_ptr, const uchar *b_ptr)
 
 void Field_short::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table->s->db_low_byte_first)
-  {
-    if (unsigned_flag)
-      to[0] = ptr[0];
-    else
-      to[0] = (char) (ptr[0] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[1];
-  }
+  if (unsigned_flag)
+    to[0] = ptr[1];
   else
-#endif
-  {
-    if (unsigned_flag)
-      to[0] = ptr[1];
-    else
-      to[0] = (char) (ptr[1] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[0];
-  }
+    to[0] = (char) (ptr[1] ^ 128);              /* Revers signbit */
+  to[1]   = ptr[0];
 }
 
 void Field_short::sql_type(String &res) const
@@ -3385,7 +3324,7 @@ void Field_short::sql_type(String &res) const
 
 int Field_medium::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int store_tmp;
   int error;
   longlong rnd;
@@ -3399,7 +3338,7 @@ int Field_medium::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_medium::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   nr=rint(nr);
   if (unsigned_flag)
@@ -3445,7 +3384,7 @@ int Field_medium::store(double nr)
 
 int Field_medium::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
 
   if (unsigned_flag)
@@ -3576,28 +3515,21 @@ void Field_medium::sql_type(String &res) const
 
 int Field_long::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   long store_tmp;
   int error;
   longlong rnd;
   
   error= get_int(cs, from, len, &rnd, UINT_MAX32, INT_MIN32, INT_MAX32);
   store_tmp= unsigned_flag ? (long) (ulonglong) rnd : (long) rnd;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int4store(ptr, store_tmp);
-  }
-  else
-#endif
-    longstore(ptr, store_tmp);
+  int4store(ptr, store_tmp);
   return error;
 }
 
 
 int Field_long::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   int32 res;
   nr=rint(nr);
@@ -3635,21 +3567,14 @@ int Field_long::store(double nr)
   if (error)
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
 
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int4store(ptr,res);
-  }
-  else
-#endif
-    longstore(ptr,res);
+  int4store(ptr,res);
   return error;
 }
 
 
 int Field_long::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   int32 res;
 
@@ -3688,14 +3613,7 @@ int Field_long::store(longlong nr, bool unsigned_val)
   if (error)
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
 
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int4store(ptr,res);
-  }
-  else
-#endif
-    longstore(ptr,res);
+  int4store(ptr,res);
   return error;
 }
 
@@ -3704,12 +3622,7 @@ double Field_long::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   int32 j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
   return unsigned_flag ? (double) (uint32) j : (double) j;
 }
 
@@ -3719,12 +3632,7 @@ longlong Field_long::val_int(void)
   int32 j;
   /* See the comment in Field_long::store(long long) */
   DBUG_ASSERT(table->in_use == current_thd);
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
   return unsigned_flag ? (longlong) (uint32) j : (longlong) j;
 }
 
@@ -3738,12 +3646,7 @@ String *Field_long::val_str(String *val_buffer,
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   int32 j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
 
   if (unsigned_flag)
     length=cs->cset->long10_to_str(cs,to,mlength, 10,(long) (uint32)j);
@@ -3766,18 +3669,8 @@ bool Field_long::send_binary(Protocol *protocol)
 int Field_long::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   int32 a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    a=sint4korr(a_ptr);
-    b=sint4korr(b_ptr);
-  }
-  else
-#endif
-  {
-    longget(a,a_ptr);
-    longget(b,b_ptr);
-  }
+  a=sint4korr(a_ptr);
+  b=sint4korr(b_ptr);
   if (unsigned_flag)
     return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0;
   return (a < b) ? -1 : (a > b) ? 1 : 0;
@@ -3785,28 +3678,13 @@ int Field_long::cmp(const uchar *a_ptr, const uchar *b_ptr)
 
 void Field_long::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table->s->db_low_byte_first)
-  {
-    if (unsigned_flag)
-      to[0] = ptr[0];
-    else
-      to[0] = (char) (ptr[0] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[1];
-    to[2]   = ptr[2];
-    to[3]   = ptr[3];
-  }
+  if (unsigned_flag)
+    to[0] = ptr[3];
   else
-#endif
-  {
-    if (unsigned_flag)
-      to[0] = ptr[3];
-    else
-      to[0] = (char) (ptr[3] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[2];
-    to[2]   = ptr[1];
-    to[3]   = ptr[0];
-  }
+    to[0] = (char) (ptr[3] ^ 128);              /* Revers signbit */
+  to[1]   = ptr[2];
+  to[2]   = ptr[1];
+  to[3]   = ptr[0];
 }
 
 
@@ -3824,7 +3702,7 @@ void Field_long::sql_type(String &res) const
 
 int Field_longlong::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   char *end;
   ulonglong tmp;
@@ -3840,73 +3718,30 @@ int Field_longlong::store(const char *from,uint len,CHARSET_INFO *cs)
     error= 1;
   else
     error= 0;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int8store(ptr,tmp);
-  }
-  else
-#endif
-    longlongstore(ptr,tmp);
+  int8store(ptr,tmp);
   return error;
 }
 
 
 int Field_longlong::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  int error= 0;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
+  bool error;
   longlong res;
 
-  nr= rint(nr);
-  if (unsigned_flag)
-  {
-    if (nr < 0)
-    {
-      res=0;
-      error= 1;
-    }
-    else if (nr >= (double) ULONGLONG_MAX)
-    {
-      res= ~(longlong) 0;
-      error= 1;
-    }
-    else
-      res=(longlong) double2ulonglong(nr);
-  }
-  else
-  {
-    if (nr <= (double) LONGLONG_MIN)
-    {
-      res= LONGLONG_MIN;
-      error= (nr < (double) LONGLONG_MIN);
-    }
-    else if (nr >= (double) (ulonglong) LONGLONG_MAX)
-    {
-      res= LONGLONG_MAX;
-      error= (nr > (double) LONGLONG_MAX);
-    }
-    else
-      res=(longlong) nr;
-  }
+  res= double_to_longlong(nr, unsigned_flag, &error);
+
   if (error)
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
 
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int8store(ptr,res);
-  }
-  else
-#endif
-    longlongstore(ptr,res);
+  int8store(ptr,res);
   return error;
 }
 
 
 int Field_longlong::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
 
   if (nr < 0)                                   // Only possible error
@@ -3923,14 +3758,7 @@ int Field_longlong::store(longlong nr, bool unsigned_val)
     }
   }
 
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int8store(ptr,nr);
-  }
-  else
-#endif
-    longlongstore(ptr,nr);
+  int8store(ptr,nr);
   return error;
 }
 
@@ -3939,14 +3767,7 @@ double Field_longlong::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   longlong j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    j=sint8korr(ptr);
-  }
-  else
-#endif
-    longlongget(j,ptr);
+  j=sint8korr(ptr);
   /* The following is open coded to avoid a bug in gcc 3.3 */
   if (unsigned_flag)
   {
@@ -3961,12 +3782,7 @@ longlong Field_longlong::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   longlong j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint8korr(ptr);
-  else
-#endif
-    longlongget(j,ptr);
+  j=sint8korr(ptr);
   return j;
 }
 
@@ -3980,12 +3796,7 @@ String *Field_longlong::val_str(String *val_buffer,
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   longlong j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint8korr(ptr);
-  else
-#endif
-    longlongget(j,ptr);
+  j=sint8korr(ptr);
 
   length=(uint) (cs->cset->longlong10_to_str)(cs,to,mlength,
 					unsigned_flag ? 10 : -10, j);
@@ -4007,18 +3818,8 @@ bool Field_longlong::send_binary(Protocol *protocol)
 int Field_longlong::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   longlong a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    a=sint8korr(a_ptr);
-    b=sint8korr(b_ptr);
-  }
-  else
-#endif
-  {
-    longlongget(a,a_ptr);
-    longlongget(b,b_ptr);
-  }
+  a=sint8korr(a_ptr);
+  b=sint8korr(b_ptr);
   if (unsigned_flag)
     return ((ulonglong) a < (ulonglong) b) ? -1 :
     ((ulonglong) a > (ulonglong) b) ? 1 : 0;
@@ -4027,36 +3828,17 @@ int Field_longlong::cmp(const uchar *a_ptr, const uchar *b_ptr)
 
 void Field_longlong::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table->s->db_low_byte_first)
-  {
-    if (unsigned_flag)
-      to[0] = ptr[0];
-    else
-      to[0] = (char) (ptr[0] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[1];
-    to[2]   = ptr[2];
-    to[3]   = ptr[3];
-    to[4]   = ptr[4];
-    to[5]   = ptr[5];
-    to[6]   = ptr[6];
-    to[7]   = ptr[7];
-  }
+  if (unsigned_flag)
+    to[0] = ptr[7];
   else
-#endif
-  {
-    if (unsigned_flag)
-      to[0] = ptr[7];
-    else
-      to[0] = (char) (ptr[7] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[6];
-    to[2]   = ptr[5];
-    to[3]   = ptr[4];
-    to[4]   = ptr[3];
-    to[5]   = ptr[2];
-    to[6]   = ptr[1];
-    to[7]   = ptr[0];
-  }
+    to[0] = (char) (ptr[7] ^ 128);		/* Revers signbit */
+  to[1]   = ptr[6];
+  to[2]   = ptr[5];
+  to[3]   = ptr[4];
+  to[4]   = ptr[3];
+  to[5]   = ptr[2];
+  to[6]   = ptr[1];
+  to[7]   = ptr[0];
 }
 
 
@@ -4073,43 +3855,6 @@ void Field_longlong::sql_type(String &res) const
   Floating-point numbers
  */
 
-uchar *
-Field_real::pack(uchar *to, const uchar *from,
-                 uint max_length, bool low_byte_first)
-{
-  DBUG_ENTER("Field_real::pack");
-  DBUG_ASSERT(max_length >= pack_length());
-#ifdef WORDS_BIGENDIAN
-  if (low_byte_first != table->s->db_low_byte_first)
-  {
-    const uchar *dptr= from + pack_length();
-    while (dptr-- > from)
-      *to++ = *dptr;
-    DBUG_RETURN(to);
-  }
-  else
-#endif
-    DBUG_RETURN(Field::pack(to, from, max_length, low_byte_first));
-}
-
-const uchar *
-Field_real::unpack(uchar *to, const uchar *from,
-                   uint param_data, bool low_byte_first)
-{
-  DBUG_ENTER("Field_real::unpack");
-#ifdef WORDS_BIGENDIAN
-  if (low_byte_first != table->s->db_low_byte_first)
-  {
-    const uchar *dptr= from + pack_length();
-    while (dptr-- > from)
-      *to++ = *dptr;
-    DBUG_RETURN(from + pack_length());
-  }
-  else
-#endif
-    DBUG_RETURN(Field::unpack(to, from, param_data, low_byte_first));
-}
-
 /****************************************************************************
   single precision float
 ****************************************************************************/
@@ -4120,7 +3865,7 @@ int Field_float::store(const char *from,uint len,CHARSET_INFO *cs)
   char *end;
   double nr= my_strntod(cs,(char*) from,len,&end,&error);
   if (error || (!len || ((uint) (end-from) != len &&
-                table->in_use->count_cuted_fields)))
+                         table->in_use->count_cuted_fields)))
   {
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                 (error ? ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED), 1);
@@ -4133,18 +3878,22 @@ int Field_float::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_float::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  int error= truncate(&nr, FLT_MAX);
-  float j= (float)nr;
-
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
+  int error= truncate_double(&nr, field_length,
+                             not_fixed ? NOT_FIXED_DEC : dec,
+                             unsigned_flag, FLT_MAX);
+  if (error)
   {
-    float4store(ptr,j);
+    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
+    if (error < 0)                                // Wrong double value
+    {
+      error= 1;
+      set_null();
+    }
   }
-  else
-#endif
-    memcpy(ptr, &j, sizeof(j));
+  float j= (float)nr;
+
+  float4store(ptr,j);
   return error;
 }
 
@@ -4160,28 +3909,14 @@ double Field_float::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   float j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(j,ptr);
-  }
-  else
-#endif
-    memcpy(&j, ptr, sizeof(j));
+  float4get(j,ptr);
   return ((double) j);
 }
 
 longlong Field_float::val_int(void)
 {
   float j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(j,ptr);
-  }
-  else
-#endif
-    memcpy(&j, ptr, sizeof(j));
+  float4get(j,ptr);
   return (longlong) rint(j);
 }
 
@@ -4192,14 +3927,7 @@ String *Field_float::val_str(String *val_buffer,
   ASSERT_COLUMN_MARKED_FOR_READ;
   DBUG_ASSERT(!zerofill || field_length <= MAX_FIELD_CHARLENGTH);
   float nr;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(nr,ptr);
-  }
-  else
-#endif
-    memcpy(&nr, ptr, sizeof(nr));
+  float4get(nr,ptr);
 
   uint to_length= 70;
   if (val_buffer->alloc(to_length))
@@ -4233,18 +3961,8 @@ String *Field_float::val_str(String *val_buffer,
 int Field_float::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   float a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(a,a_ptr);
-    float4get(b,b_ptr);
-  }
-  else
-#endif
-  {
-    memcpy(&a, a_ptr, sizeof(float));
-    memcpy(&b, b_ptr, sizeof(float));
-  }
+  float4get(a,a_ptr);
+  float4get(b,b_ptr);
   return (a < b) ? -1 : (a > b) ? 1 : 0;
 }
 
@@ -4253,14 +3971,7 @@ int Field_float::cmp(const uchar *a_ptr, const uchar *b_ptr)
 void Field_float::sort_string(uchar *to,uint length __attribute__((unused)))
 {
   float nr;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(nr,ptr);
-  }
-  else
-#endif
-    memcpy(&nr, ptr, sizeof(float));
+  float4get(nr,ptr);
 
   uchar *tmp= to;
   if (nr == (float) 0.0)
@@ -4342,7 +4053,7 @@ int Field_double::store(const char *from,uint len,CHARSET_INFO *cs)
   char *end;
   double nr= my_strntod(cs,(char*) from, len, &end, &error);
   if (error || (!len || ((uint) (end-from) != len &&
-                table->in_use->count_cuted_fields)))
+                         table->in_use->count_cuted_fields)))
   {
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                 (error ? ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED), 1);
@@ -4355,17 +4066,21 @@ int Field_double::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_double::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  int error= truncate(&nr, DBL_MAX);
-
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
+  int error= truncate_double(&nr, field_length,
+                             not_fixed ? NOT_FIXED_DEC : dec,
+                             unsigned_flag, DBL_MAX);
+  if (error)
   {
-    float8store(ptr,nr);
+    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
+    if (error < 0)                                // Wrong double value
+    {
+      error= 1;
+      set_null();
+    }
   }
-  else
-#endif
-    doublestore(ptr,nr);
+
+  float8store(ptr,nr);
   return error;
 }
 
@@ -4380,28 +4095,31 @@ int Field_double::store(longlong nr, bool unsigned_val)
   If a field has fixed length, truncate the double argument pointed to by 'nr'
   appropriately.
   Also ensure that the argument is within [-max_value; max_value] range.
+
+  return
+    0   ok
+    -1  Illegal double value
+    1   Value was truncated
 */
 
-int Field_real::truncate(double *nr, double max_value)
+int truncate_double(double *nr, uint field_length, uint dec,
+                    bool unsigned_flag, double max_value)
 {
-  int error= 1;
+  int error= 0;
   double res= *nr;
   
   if (isnan(res))
   {
-    res= 0;
-    set_null();
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
-    goto end;
+    *nr= 0;
+    return -1;
   }
   else if (unsigned_flag && res < 0)
   {
-    res= 0;
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
-    goto end;
+    *nr= 0;
+    return 1;
   }
 
-  if (!not_fixed)
+  if (dec < NOT_FIXED_DEC)
   {
     uint order= field_length - dec;
     uint step= array_elements(log_10) - 1;
@@ -4421,22 +4139,70 @@ int Field_real::truncate(double *nr, double max_value)
   
   if (res < -max_value)
   {
-   res= -max_value;
-   set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
+    res= -max_value;
+    error= 1;
   }
   else if (res > max_value)
   {
     res= max_value;
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
+    error= 1;
   }
-  else
-    error= 0;
 
-end:
   *nr= res;
   return error;
 }
 
+/*
+  Convert double to longlong / ulonglong.
+  If double is outside of range, adjust return value and set error.
+
+  SYNOPSIS
+  double_to_longlong()
+  nr	  	 Number to convert
+  unsigned_flag  1 if result is unsigned
+  error		 Will be set to 1 in case of overflow.
+*/
+
+longlong double_to_longlong(double nr, bool unsigned_flag, bool *error)
+{
+  longlong res;
+
+  *error= 0;
+
+  nr= rint(nr);
+  if (unsigned_flag)
+  {
+    if (nr < 0)
+    {
+      res= 0;
+      *error= 1;
+    }
+    else if (nr >= (double) ULONGLONG_MAX)
+    {
+      res= ~(longlong) 0;
+      *error= 1;
+    }
+    else
+      res= (longlong) double2ulonglong(nr);
+  }
+  else
+  {
+    if (nr <= (double) LONGLONG_MIN)
+    {
+      res= LONGLONG_MIN;
+      *error= (nr < (double) LONGLONG_MIN);
+    }
+    else if (nr >= (double) (ulonglong) LONGLONG_MAX)
+    {
+      res= LONGLONG_MAX;
+      *error= (nr > (double) LONGLONG_MAX);
+    }
+    else
+      res= (longlong) nr;
+  }
+  return res;
+}
+
 
 int Field_real::store_decimal(const my_decimal *dm)
 {
@@ -4445,18 +4211,17 @@ int Field_real::store_decimal(const my_decimal *dm)
   return store(dbl);
 }
 
+int Field_real::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  return store(TIME_to_double(ltime));
+}
+
+
 double Field_double::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   double j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(j,ptr);
-  }
-  else
-#endif
-    doubleget(j,ptr);
+  float8get(j,ptr);
   return j;
 }
 
@@ -4465,33 +4230,13 @@ longlong Field_double::val_int(void)
   ASSERT_COLUMN_MARKED_FOR_READ;
   double j;
   longlong res;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(j,ptr);
-  }
-  else
-#endif
-    doubleget(j,ptr);
-  /* Check whether we fit into longlong range */
-  if (j <= (double) LONGLONG_MIN)
-  {
-    res= (longlong) LONGLONG_MIN;
-    goto warn;
-  }
-  if (j >= (double) (ulonglong) LONGLONG_MAX)
-  {
-    res= (longlong) LONGLONG_MAX;
-    goto warn;
-  }
-  return (longlong) rint(j);
+  bool error;
+  float8get(j,ptr);
 
-warn:
+  res= double_to_longlong(j, 0, &error);
+  if (error)
   {
-    char buf[DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE];
-    String tmp(buf, sizeof(buf), &my_charset_latin1), *str;
-    str= val_str(&tmp, 0);
-    ErrConvString err(str);
+    ErrConvDouble err(j);
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_TRUNCATED_WRONG_VALUE,
                         ER(ER_TRUNCATED_WRONG_VALUE), "INTEGER",
@@ -4509,20 +4254,22 @@ my_decimal *Field_real::val_decimal(my_decimal *decimal_value)
 }
 
 
+bool Field_real::get_date(MYSQL_TIME *ltime,uint fuzzydate)
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  double nr= val_real();
+  return double_to_datetime_with_warn(nr, ltime, fuzzydate, field_name);
+}
+
+
 String *Field_double::val_str(String *val_buffer,
 			      String *val_ptr __attribute__((unused)))
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   DBUG_ASSERT(!zerofill || field_length <= MAX_FIELD_CHARLENGTH);
   double nr;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(nr,ptr);
-  }
-  else
-#endif
-    doubleget(nr,ptr);
+  float8get(nr,ptr);
+
   uint to_length= DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE;
   if (val_buffer->alloc(to_length))
   {
@@ -4554,18 +4301,8 @@ bool Field_double::send_binary(Protocol *protocol)
 int Field_double::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   double a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(a,a_ptr);
-    float8get(b,b_ptr);
-  }
-  else
-#endif
-  {
-    doubleget(a, a_ptr);
-    doubleget(b, b_ptr);
-  }
+  float8get(a,a_ptr);
+  float8get(b,b_ptr);
   return (a < b) ? -1 : (a > b) ? 1 : 0;
 }
 
@@ -4577,14 +4314,7 @@ int Field_double::cmp(const uchar *a_ptr, const uchar *b_ptr)
 void Field_double::sort_string(uchar *to,uint length __attribute__((unused)))
 {
   double nr;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(nr,ptr);
-  }
-  else
-#endif
-    doubleget(nr,ptr);
+  float8get(nr,ptr);
   change_double_for_sort(nr, to);
 }
 
@@ -4672,12 +4402,12 @@ Field_timestamp::Field_timestamp(uchar *ptr_arg, uint32 len_arg,
 				 const char *field_name_arg,
 				 TABLE_SHARE *share,
 				 CHARSET_INFO *cs)
-  :Field_str(ptr_arg, MAX_DATETIME_WIDTH, null_ptr_arg, null_bit_arg,
+  :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg,
 	     unireg_check_arg, field_name_arg, cs)
 {
   /* For 4.0 MYD and 4.0 InnoDB compatibility */
-  flags|= ZEROFILL_FLAG | UNSIGNED_FLAG | BINARY_FLAG;
-  if (!share->timestamp_field && unireg_check != NONE)
+  flags|= UNSIGNED_FLAG | BINARY_FLAG;
+  if (unireg_check != NONE && !share->timestamp_field)
   {
     /* This timestamp has auto-update */
     share->timestamp_field= this;
@@ -4688,20 +4418,6 @@ Field_timestamp::Field_timestamp(uchar *ptr_arg, uint32 len_arg,
 }
 
 
-Field_timestamp::Field_timestamp(bool maybe_null_arg,
-                                 const char *field_name_arg,
-                                 CHARSET_INFO *cs)
-  :Field_str((uchar*) 0, MAX_DATETIME_WIDTH,
-             maybe_null_arg ? (uchar*) "": 0, 0,
-	     NONE, field_name_arg, cs)
-{
-  /* For 4.0 MYD and 4.0 InnoDB compatibility */
-  flags|= ZEROFILL_FLAG | UNSIGNED_FLAG | BINARY_FLAG;
-    if (unireg_check != TIMESTAMP_DN_FIELD)
-      flags|= ON_UPDATE_NOW_FLAG;
-}
-
-
 /**
   Get auto-set type for TIMESTAMP field.
 
@@ -4736,175 +4452,150 @@ timestamp_auto_set_type Field_timestamp::get_auto_set_type() const
   }
 }
 
-
-int Field_timestamp::store(const char *from,uint len,CHARSET_INFO *cs)
+my_time_t Field_timestamp::get_timestamp(ulong *sec_part) const
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  MYSQL_TIME l_time;
-  my_time_t tmp= 0;
-  int error;
-  bool have_smth_to_conv;
-  my_bool in_dst_time_gap;
-  THD *thd= table ? table->in_use : current_thd;
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  *sec_part= 0;
+  return sint4korr(ptr);
+}
 
-  /* We don't want to store invalid or fuzzy datetime values in TIMESTAMP */
-  have_smth_to_conv= (str_to_datetime(cs, from, len, &l_time,
-                                      (thd->variables.sql_mode &
-                                       MODE_NO_ZERO_DATE) |
-                                      MODE_NO_ZERO_IN_DATE, &error) >
-                      MYSQL_TIMESTAMP_ERROR);
 
-  if (error || !have_smth_to_conv)
+int Field_timestamp::store_TIME_with_warning(THD *thd, MYSQL_TIME *l_time,
+                                             const ErrConv *str,
+                                             bool was_cut,
+                                             bool have_smth_to_conv)
+{
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
+  uint error = 0;
+  my_time_t timestamp;
+
+  if (was_cut || !have_smth_to_conv)
   {
     error= 1;
     set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                         from, len, MYSQL_TIMESTAMP_DATETIME, 1);
+                         str, MYSQL_TIMESTAMP_DATETIME, 1);
   }
-
   /* Only convert a correct date (not a zero date) */
-  if (have_smth_to_conv && l_time.month)
+  if (have_smth_to_conv && l_time->month)
   {
-    if (!(tmp= TIME_to_timestamp(thd, &l_time, &in_dst_time_gap)))
+    uint conversion_error;
+    timestamp= TIME_to_timestamp(thd, l_time, &conversion_error);
+    if (timestamp == 0 && l_time->second_part == 0)
+      conversion_error= ER_WARN_DATA_OUT_OF_RANGE;
+    if (conversion_error)
     {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           ER_WARN_DATA_OUT_OF_RANGE,
-                           from, len, MYSQL_TIMESTAMP_DATETIME, !error);
-      error= 1;
-    }
-    else if (in_dst_time_gap)
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           ER_WARN_INVALID_TIMESTAMP,
-                           from, len, MYSQL_TIMESTAMP_DATETIME, !error);
+      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, conversion_error,
+                           str, MYSQL_TIMESTAMP_DATETIME, !error);
       error= 1;
     }
   }
-  store_timestamp(tmp);
+  else
+  {
+    timestamp= 0;
+    l_time->second_part= 0;
+  }
+  store_TIME(timestamp, l_time->second_part);
   return error;
 }
 
 
-int Field_timestamp::store(double nr)
+int Field_timestamp::store_time_dec(MYSQL_TIME *ltime, uint dec)
 {
-  int error= 0;
-  if (nr < 0 || nr > 99991231235959.0)
-  {
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         ER_WARN_DATA_OUT_OF_RANGE,
-                         nr, MYSQL_TIMESTAMP_DATETIME);
-    nr= 0;					// Avoid overflow on buff
-    error= 1;
-  }
-  error|= Field_timestamp::store((longlong) rint(nr), FALSE);
-  return error;
+  THD *thd= table->in_use;
+  int unused;
+  MYSQL_TIME l_time= *ltime;
+  ErrConvTime str(ltime);
+  bool valid= !check_date(&l_time, pack_time(&l_time) != 0,
+                          (thd->variables.sql_mode & MODE_NO_ZERO_DATE) |
+                                       MODE_NO_ZERO_IN_DATE, &unused);
+
+  return store_TIME_with_warning(thd, &l_time, &str, false, valid);
 }
 
 
-int Field_timestamp::store(longlong nr, bool unsigned_val)
+int Field_timestamp::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
   MYSQL_TIME l_time;
-  my_time_t timestamp= 0;
   int error;
-  my_bool in_dst_time_gap;
-  THD *thd= table ? table->in_use : current_thd;
+  int have_smth_to_conv;
+  ErrConvString str(from, len, cs);
+  THD *thd= table->in_use;
 
   /* We don't want to store invalid or fuzzy datetime values in TIMESTAMP */
-  longlong tmp= number_to_datetime(nr, &l_time, (thd->variables.sql_mode &
+  have_smth_to_conv= (str_to_datetime(cs, from, len, &l_time,
+                                      (thd->variables.sql_mode &
+                                       MODE_NO_ZERO_DATE) |
+                                       MODE_NO_ZERO_IN_DATE, &error) >
+                      MYSQL_TIMESTAMP_ERROR);
+  return store_TIME_with_warning(thd, &l_time, &str, error, have_smth_to_conv);
+}
+
+
+int Field_timestamp::store(double nr)
+{
+  MYSQL_TIME l_time;
+  int error;
+  ErrConvDouble str(nr);
+  THD *thd= table->in_use;
+
+  longlong tmp= double_to_datetime(nr, &l_time, (thd->variables.sql_mode &
                                                  MODE_NO_ZERO_DATE) |
                                    MODE_NO_ZERO_IN_DATE, &error);
-  if (tmp == LL(-1))
-  {
-    error= 2;
-  }
+  return store_TIME_with_warning(thd, &l_time, &str, error, tmp != -1);
+}
 
-  if (!error && tmp)
-  {
-    if (!(timestamp= TIME_to_timestamp(thd, &l_time, &in_dst_time_gap)))
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           ER_WARN_DATA_OUT_OF_RANGE,
-                           nr, MYSQL_TIMESTAMP_DATETIME, 1);
-      error= 1;
-    }
-    if (in_dst_time_gap)
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           ER_WARN_INVALID_TIMESTAMP,
-                           nr, MYSQL_TIMESTAMP_DATETIME, 1);
-      error= 1;
-    }
-  } else if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         WARN_DATA_TRUNCATED,
-                         nr, MYSQL_TIMESTAMP_DATETIME, 1);
 
-  store_timestamp(timestamp);
-  return error;
+int Field_timestamp::store(longlong nr, bool unsigned_val)
+{
+  MYSQL_TIME l_time;
+  int error;
+  ErrConvInteger str(nr);
+  THD *thd= table->in_use;
+
+  /* We don't want to store invalid or fuzzy datetime values in TIMESTAMP */
+  longlong tmp= number_to_datetime(nr, 0, &l_time, (thd->variables.sql_mode &
+                                                 MODE_NO_ZERO_DATE) |
+                                   MODE_NO_ZERO_IN_DATE, &error);
+  return store_TIME_with_warning(thd, &l_time, &str, error, tmp != LL(-1));
 }
 
+
 double Field_timestamp::val_real(void)
 {
-  ASSERT_COLUMN_MARKED_FOR_READ;
   return (double) Field_timestamp::val_int();
 }
 
+
 longlong Field_timestamp::val_int(void)
 {
-  ASSERT_COLUMN_MARKED_FOR_READ;
-  uint32 temp;
-  MYSQL_TIME time_tmp;
-  THD  *thd= table ? table->in_use : current_thd;
-
-  thd->time_zone_used= 1;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    temp=uint4korr(ptr);
-  else
-#endif
-    longget(temp,ptr);
+  MYSQL_TIME ltime;
+  if (get_date(&ltime, TIME_NO_ZERO_DATE))
+    return 0;
 
-  if (temp == 0L)				// No time
-    return(0);					/* purecov: inspected */
-  
-  thd->variables.time_zone->gmt_sec_to_TIME(&time_tmp, (my_time_t)temp);
-  
-  return time_tmp.year * LL(10000000000) + time_tmp.month * LL(100000000) +
-         time_tmp.day * 1000000L + time_tmp.hour * 10000L +
-         time_tmp.minute * 100 + time_tmp.second;
+  return ltime.year * 10000000000LL + ltime.month * 100000000LL +
+         ltime.day * 1000000L + ltime.hour * 10000L +
+         ltime.minute * 100 + ltime.second;
 }
 
 
 String *Field_timestamp::val_str(String *val_buffer, String *val_ptr)
 {
-  ASSERT_COLUMN_MARKED_FOR_READ;
+  MYSQL_TIME ltime;
   uint32 temp, temp2;
-  MYSQL_TIME time_tmp;
-  THD *thd= table ? table->in_use : current_thd;
   char *to;
 
   val_buffer->alloc(field_length+1);
   to= (char*) val_buffer->ptr();
   val_buffer->length(field_length);
 
-  thd->time_zone_used= 1;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    temp=uint4korr(ptr);
-  else
-#endif
-    longget(temp,ptr);
-
-  if (temp == 0L)
+  if (get_date(&ltime, TIME_NO_ZERO_DATE))
   {				      /* Zero time is "000000" */
-    val_ptr->set(STRING_WITH_LEN("0000-00-00 00:00:00"), &my_charset_numeric);
+    val_ptr->set(zero_timestamp, field_length, &my_charset_numeric);
     return val_ptr;
   }
   val_buffer->set_charset(&my_charset_numeric);	// Safety
-  
-  thd->variables.time_zone->gmt_sec_to_TIME(&time_tmp,(my_time_t)temp);
-
-  temp= time_tmp.year % 100;
+   
+  temp= ltime.year % 100;
   if (temp < YY_PART_YEAR - 1)
   {
     *to++= '2';
@@ -4919,27 +4610,27 @@ String *Field_timestamp::val_str(String *val_buffer, String *val_ptr)
   *to++= (char) ('0'+(char) (temp2));
   *to++= (char) ('0'+(char) (temp));
   *to++= '-';
-  temp=time_tmp.month;
+  temp=ltime.month;
   temp2=temp/10; temp=temp-temp2*10;
   *to++= (char) ('0'+(char) (temp2));
   *to++= (char) ('0'+(char) (temp));
   *to++= '-';
-  temp=time_tmp.day;
+  temp=ltime.day;
   temp2=temp/10; temp=temp-temp2*10;
   *to++= (char) ('0'+(char) (temp2));
   *to++= (char) ('0'+(char) (temp));
   *to++= ' ';
-  temp=time_tmp.hour;
+  temp=ltime.hour;
   temp2=temp/10; temp=temp-temp2*10;
   *to++= (char) ('0'+(char) (temp2));
   *to++= (char) ('0'+(char) (temp));
   *to++= ':';
-  temp=time_tmp.minute;
+  temp=ltime.minute;
   temp2=temp/10; temp=temp-temp2*10;
   *to++= (char) ('0'+(char) (temp2));
   *to++= (char) ('0'+(char) (temp));
   *to++= ':';
-  temp=time_tmp.second;
+  temp=ltime.second;
   temp2=temp/10; temp=temp-temp2*10;
   *to++= (char) ('0'+(char) (temp2));
   *to++= (char) ('0'+(char) (temp));
@@ -4951,16 +4642,11 @@ String *Field_timestamp::val_str(String *val_buffer, String *val_ptr)
 
 bool Field_timestamp::get_date(MYSQL_TIME *ltime, uint fuzzydate)
 {
-  long temp;
-  THD *thd= table ? table->in_use : current_thd;
+  THD *thd= table->in_use;
   thd->time_zone_used= 1;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    temp=uint4korr(ptr);
-  else
-#endif
-    longget(temp,ptr);
-  if (temp == 0L)
+  ulong sec_part;
+  my_time_t temp= get_timestamp(&sec_part);
+  if (temp == 0 && sec_part == 0)
   {				      /* Zero time is "000000" */
     if (fuzzydate & TIME_NO_ZERO_DATE)
       return 1;
@@ -4969,61 +4655,35 @@ bool Field_timestamp::get_date(MYSQL_TIME *ltime, uint fuzzydate)
   else
   {
     thd->variables.time_zone->gmt_sec_to_TIME(ltime, (my_time_t)temp);
+    ltime->second_part= sec_part;
   }
   return 0;
 }
 
-bool Field_timestamp::get_time(MYSQL_TIME *ltime)
-{
-  return Field_timestamp::get_date(ltime,0);
-}
-
 
 bool Field_timestamp::send_binary(Protocol *protocol)
 {
-  MYSQL_TIME tm;
-  Field_timestamp::get_date(&tm, 0);
-  return protocol->store(&tm);
+  MYSQL_TIME ltime;
+  Field_timestamp::get_date(&ltime, 0);
+  return protocol->store(&ltime, 0);
 }
 
 
 int Field_timestamp::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   int32 a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    a=sint4korr(a_ptr);
-    b=sint4korr(b_ptr);
-  }
-  else
-#endif
-  {
-  longget(a,a_ptr);
-  longget(b,b_ptr);
-  }
+  a=sint4korr(a_ptr);
+  b=sint4korr(b_ptr);
   return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0;
 }
 
 
 void Field_timestamp::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table || !table->s->db_low_byte_first)
-  {
-    to[0] = ptr[0];
-    to[1] = ptr[1];
-    to[2] = ptr[2];
-    to[3] = ptr[3];
-  }
-  else
-#endif
-  {
-    to[0] = ptr[3];
-    to[1] = ptr[2];
-    to[2] = ptr[1];
-    to[3] = ptr[0];
-  }
+  to[0] = ptr[3];
+  to[1] = ptr[2];
+  to[2] = ptr[1];
+  to[3] = ptr[0];
 }
 
 
@@ -5033,149 +4693,442 @@ void Field_timestamp::sql_type(String &res) const
 }
 
 
-void Field_timestamp::set_time()
+int Field_timestamp::set_time()
 {
-  THD *thd= table ? table->in_use : current_thd;
-  long tmp= (long) thd->query_start();
+  THD *thd= table->in_use;
   set_notnull();
-  store_timestamp(tmp);
+  store_TIME(thd->query_start(), 0);
+  return 0;
 }
 
-/****************************************************************************
-** time type
-** In string context: HH:MM:SS
-** In number context: HHMMSS
-** Stored as a 3 byte unsigned int
-****************************************************************************/
+void Field_timestamp_hires::sql_type(String &res) const
+{
+  CHARSET_INFO *cs=res.charset();
+  res.length(cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(),
+                                "timestamp(%u)", dec));
+}
 
-int Field_time::store(const char *from,uint len,CHARSET_INFO *cs)
+#ifdef NOT_USED
+static void store_native(ulonglong num, uchar *to, uint bytes)
+{
+  switch(bytes) {
+  case 1: *to= (uchar)num;              break;
+  case 2: shortstore(to, (ushort)num);  break;
+  case 3: int3store(to, num); /* Sic!*/ break;
+  case 4: longstore(to, (ulong)num);    break;
+  case 8: longlongstore(to, num);       break;
+  default: DBUG_ASSERT(0);
+  }
+}
+
+static longlong read_native(const uchar *from, uint bytes)
+{
+  switch(bytes) {
+  case 1: return from[0];
+  case 2: { uint16 tmp; shortget(tmp, from); return tmp; }
+  case 3: return uint3korr(from);
+  case 4: { uint32 tmp; longget(tmp, from); return tmp; }
+  case 8: { longlong tmp; longlongget(tmp, from); return tmp; }
+  default: DBUG_ASSERT(0); return 0;
+  }
+}
+#endif
+
+static void store_lowendian(ulonglong num, uchar *to, uint bytes)
+{
+  switch(bytes) {
+  case 1: *to= (uchar)num;    break;
+  case 2: int2store(to, num); break;
+  case 3: int3store(to, num); break;
+  case 4: int4store(to, num); break;
+  case 8: int8store(to, num); break;
+  default: DBUG_ASSERT(0);
+  }
+}
+
+static longlong read_lowendian(const uchar *from, uint bytes)
+{
+  switch(bytes) {
+  case 1: return from[0];
+  case 2: return uint2korr(from);
+  case 3: return uint3korr(from);
+  case 4: return uint4korr(from);
+  case 8: return sint8korr(from);
+  default: DBUG_ASSERT(0); return 0;
+  }
+}
+
+static void store_bigendian(ulonglong num, uchar *to, uint bytes)
+{
+  switch(bytes) {
+  case 1: mi_int1store(to, num); break;
+  case 2: mi_int2store(to, num); break;
+  case 3: mi_int3store(to, num); break;
+  case 4: mi_int4store(to, num); break;
+  case 5: mi_int5store(to, num); break;
+  case 6: mi_int6store(to, num); break;
+  case 7: mi_int7store(to, num); break;
+  case 8: mi_int8store(to, num); break;
+  default: DBUG_ASSERT(0);
+  }
+}
+
+static longlong read_bigendian(const uchar *from, uint bytes)
+{
+  switch(bytes) {
+  case 1: return mi_uint1korr(from);
+  case 2: return mi_uint2korr(from);
+  case 3: return mi_uint3korr(from);
+  case 4: return mi_uint4korr(from);
+  case 5: return mi_uint5korr(from);
+  case 6: return mi_uint6korr(from);
+  case 7: return mi_uint7korr(from);
+  case 8: return mi_sint8korr(from);
+  default: DBUG_ASSERT(0); return 0;
+  }
+}
+
+void Field_timestamp_hires::store_TIME(my_time_t timestamp, ulong sec_part)
+{
+  mi_int4store(ptr, timestamp);
+  store_bigendian(sec_part_shift(sec_part, dec), ptr+4, sec_part_bytes[dec]);
+}
+
+my_time_t Field_timestamp_hires::get_timestamp(ulong *sec_part) const
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  *sec_part= (long)sec_part_unshift(read_bigendian(ptr+4, sec_part_bytes[dec]), dec);
+  return mi_uint4korr(ptr);
+}
+
+double Field_timestamp_hires::val_real(void)
 {
   MYSQL_TIME ltime;
-  long tmp;
-  int error= 0;
-  int warning;
+  if (get_date(&ltime, TIME_NO_ZERO_DATE))
+    return 0;
+  
+  return ltime.year * 1e10 + ltime.month * 1e8 +
+         ltime.day * 1e6 + ltime.hour * 1e4 +
+         ltime.minute * 1e2 + ltime.second + ltime.second_part*1e-6;
+}
+
+String *Field_timestamp_hires::val_str(String *val_buffer, String *val_ptr)
+{
+  String *tmp= Field_timestamp::val_str(val_buffer, val_ptr);
+  ulong sec_part= (ulong)read_bigendian(ptr+4, sec_part_bytes[dec]);
+  
+  if (tmp->ptr() == zero_timestamp)
+    return tmp;
+
+  char *buf= const_cast<char*>(tmp->ptr() + MAX_DATETIME_WIDTH);
+  for (int i=dec; i>0; i--, sec_part/=10)
+    buf[i]= (char)(sec_part % 10) + '0';
+  buf[0]= '.';
+  buf[dec+1]= 0;
+  return tmp;
+}
+
+
+my_decimal *Field_timestamp_hires::val_decimal(my_decimal *d)
+{
+  MYSQL_TIME ltime;
+  get_date(&ltime, 0);
+  longlong intg= TIME_to_ulonglong(&ltime);
+  return seconds2my_decimal(ltime.neg, intg, ltime.second_part, d);
+}
+ 
+int Field_timestamp_hires::store_decimal(const my_decimal *d)
+{
+  ulonglong nr;
+  ulong sec_part;
+  int error;
+  MYSQL_TIME ltime;
+  longlong tmp;
+  THD *thd= table->in_use;
+  ErrConvDecimal str(d);
 
-  if (str_to_time(cs, from, len, &ltime, &warning))
+  if (my_decimal2seconds(d, &nr, &sec_part))
   {
-    tmp=0L;
+    tmp= -1;
     error= 2;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                         from, len, MYSQL_TIMESTAMP_TIME, 1);
   }
   else
-  {
-    if (warning & MYSQL_TIME_WARN_TRUNCATED)
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           WARN_DATA_TRUNCATED,
-                           from, len, MYSQL_TIMESTAMP_TIME, 1);
-      error= 1;
-    }
-    if (warning & MYSQL_TIME_WARN_OUT_OF_RANGE)
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                           ER_WARN_DATA_OUT_OF_RANGE,
-                           from, len, MYSQL_TIMESTAMP_TIME, !error);
-      error= 1;
-    }
-    if (ltime.month)
-      ltime.day=0;
-    tmp=(ltime.day*24L+ltime.hour)*10000L+(ltime.minute*100+ltime.second);
-  }
-  
-  if (ltime.neg)
-    tmp= -tmp;
-  int3store(ptr,tmp);
-  return error;
+    tmp= number_to_datetime(nr, sec_part, &ltime, TIME_NO_ZERO_IN_DATE |
+                                                  (thd->variables.sql_mode &
+                                                   MODE_NO_ZERO_DATE), &error);
+
+  return store_TIME_with_warning(thd, &ltime, &str, error, tmp != -1);
 }
 
+int Field_timestamp_hires::set_time()
+{
+  THD *thd= table->in_use;
+  set_notnull();
+  store_TIME(thd->query_start(), thd->query_start_sec_part());
+  return 0;
+}
 
-int Field_time::store_time(MYSQL_TIME *ltime, timestamp_type time_type)
+bool Field_timestamp_hires::send_binary(Protocol *protocol)
 {
-  long tmp= ((ltime->month ? 0 : ltime->day * 24L) + ltime->hour) * 10000L +
-            (ltime->minute * 100 + ltime->second);
-  if (ltime->neg)
-    tmp= -tmp;
-  return Field_time::store((longlong) tmp, FALSE);
+  MYSQL_TIME ltime;
+  Field_timestamp::get_date(&ltime, 0);
+  return protocol->store(&ltime, dec);
 }
 
 
-int Field_time::store(double nr)
+int Field_timestamp_hires::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  long tmp;
-  int error= 0;
-  if (nr > (double)TIME_MAX_VALUE)
+  int32 a,b;
+  ulong a_sec_part, b_sec_part;
+  a= mi_uint4korr(a_ptr);
+  a_sec_part= (ulong)read_bigendian(a_ptr+4, sec_part_bytes[dec]);
+  b= mi_uint4korr(b_ptr);
+  b_sec_part= (ulong)read_bigendian(b_ptr+4, sec_part_bytes[dec]);
+  return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 :
+          a_sec_part < b_sec_part  ? -1 :  a_sec_part > b_sec_part  ? 1 : 0;
+}
+
+
+void Field_timestamp_hires::sort_string(uchar *to,uint length)
+{
+  DBUG_ASSERT(length == Field_timestamp_hires::pack_length());
+  memcpy(to, ptr, length);
+}
+
+uint32 Field_timestamp_hires::pack_length() const
+{
+  return 4 + sec_part_bytes[dec];
+}
+
+void Field_timestamp_hires::make_field(Send_field *field)
+{
+  Field::make_field(field);
+  field->decimals= dec;
+}
+
+/*
+  Store string into a date/time field
+
+  RETURN
+    0  ok
+    1  Value was cut during conversion
+    2  value was out of range
+    3  Datetime value that was cut (warning level NOTE)
+       This is used by opt_range.cc:get_mm_leaf().
+*/
+int Field_temporal::store_TIME_with_warning(MYSQL_TIME *ltime,
+                                            const ErrConv *str,
+                                            int was_cut, int have_smth_to_conv)
+{
+  MYSQL_ERROR::enum_warning_level trunc_level= MYSQL_ERROR::WARN_LEVEL_WARN;
+  int ret= 2;
+  
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
+
+  if (was_cut == 0 &&
+      have_smth_to_conv == 0 &&
+      mysql_type_to_time_type(type()) != MYSQL_TIMESTAMP_TIME) // special case: zero date
+    was_cut= MYSQL_TIME_WARN_OUT_OF_RANGE;
+  else
+  if (!have_smth_to_conv)
   {
-    tmp= TIME_MAX_VALUE;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         ER_WARN_DATA_OUT_OF_RANGE, nr, MYSQL_TIMESTAMP_TIME);
-    error= 1;
+    bzero(ltime, sizeof(*ltime));
+    was_cut=  MYSQL_TIME_WARN_TRUNCATED;
+    ret= 1;
   }
-  else if (nr < (double)-TIME_MAX_VALUE)
+  else if (!(was_cut & MYSQL_TIME_WARN_TRUNCATED) &&
+           mysql_type_to_time_type(type()) == MYSQL_TIMESTAMP_DATE &&
+           (ltime->hour || ltime->minute || ltime->second || ltime->second_part))
   {
-    tmp= -TIME_MAX_VALUE;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                         ER_WARN_DATA_OUT_OF_RANGE, nr, MYSQL_TIMESTAMP_TIME);
-    error= 1;
+    trunc_level= MYSQL_ERROR::WARN_LEVEL_NOTE;
+    was_cut|=  MYSQL_TIME_WARN_TRUNCATED;
+    ret= 3;
   }
-  else
+  else if (!(was_cut & MYSQL_TIME_WARN_TRUNCATED) &&
+           mysql_type_to_time_type(type()) == MYSQL_TIMESTAMP_TIME &&
+           (ltime->year || ltime->month))
   {
-    tmp=(long) floor(fabs(nr));			// Remove fractions
-    if (nr < 0)
-      tmp= -tmp;
-    if (tmp % 100 > 59 || tmp/100 % 100 > 59)
-    {
-      tmp=0;
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                           ER_WARN_DATA_OUT_OF_RANGE, nr,
-                           MYSQL_TIMESTAMP_TIME);
-      error= 1;
-    }
+    ltime->year= ltime->month= ltime->day= 0;
+    trunc_level= MYSQL_ERROR::WARN_LEVEL_NOTE;
+    was_cut|=  MYSQL_TIME_WARN_TRUNCATED;
+    ret= 3;
   }
-  int3store(ptr,tmp);
-  return error;
+
+  /*
+    error code logic:
+    MYSQL_TIME_WARN_TRUNCATED means that the value was not a date/time at all.
+      it will be stored as zero date/time.
+    MYSQL_TIME_WARN_OUT_OF_RANGE means that the value was a date/time,
+      that is, it was parsed as such, but the value was invalid.
+
+    Also, MYSQL_TIME_WARN_TRUNCATED is used when storing a DATETIME in
+    a DATE field and non-zero time part is thrown away.
+  */
+  if (was_cut & MYSQL_TIME_WARN_TRUNCATED)
+    set_datetime_warning(trunc_level, WARN_DATA_TRUNCATED,
+                         str, mysql_type_to_time_type(type()), 1);
+  if (was_cut & MYSQL_TIME_WARN_OUT_OF_RANGE)
+    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE,
+                         str, mysql_type_to_time_type(type()), 1);
+
+  store_TIME(ltime);
+  return was_cut ? ret : 0;
 }
 
 
-int Field_time::store(longlong nr, bool unsigned_val)
+int Field_temporal::store(const char *from,uint len,CHARSET_INFO *cs)
+{
+  MYSQL_TIME ltime;
+  int error;
+  enum enum_mysql_timestamp_type func_res;
+  THD *thd= table->in_use;
+  ErrConvString str(from, len, cs);
+
+  func_res= str_to_datetime(cs, from, len, &ltime,
+                            (TIME_FUZZY_DATE |
+                             (thd->variables.sql_mode &
+                              (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
+                               MODE_INVALID_DATES))),
+                            &error);
+  return store_TIME_with_warning(&ltime, &str, error, func_res > MYSQL_TIMESTAMP_ERROR);
+}
+
+
+int Field_temporal::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  long tmp;
   int error= 0;
-  if (nr < (longlong) -TIME_MAX_VALUE && !unsigned_val)
-  {
-    tmp= -TIME_MAX_VALUE;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                         ER_WARN_DATA_OUT_OF_RANGE, nr,
-                         MYSQL_TIMESTAMP_TIME, 1);
-    error= 1;
-  }
-  else if (nr > (longlong) TIME_MAX_VALUE || (nr < 0 && unsigned_val))
+  MYSQL_TIME ltime;
+  THD *thd= table->in_use;
+  ErrConvDouble str(nr);
+
+  longlong tmp= double_to_datetime(nr, &ltime,
+                                    (TIME_FUZZY_DATE |
+                                       (thd->variables.sql_mode &
+                                        (MODE_NO_ZERO_IN_DATE |
+                                         MODE_NO_ZERO_DATE |
+                                         MODE_INVALID_DATES))), &error);
+  return store_TIME_with_warning(&ltime, &str, error, tmp != -1);
+}
+
+
+int Field_temporal::store(longlong nr, bool unsigned_val)
+{
+  int error;
+  MYSQL_TIME ltime;
+  longlong tmp;
+  THD *thd= table->in_use;
+  ErrConvInteger str(nr);
+
+  tmp= number_to_datetime(nr, 0, &ltime, (TIME_FUZZY_DATE |
+                                      (thd->variables.sql_mode &
+                                       (MODE_NO_ZERO_IN_DATE |
+                                        MODE_NO_ZERO_DATE |
+                                        MODE_INVALID_DATES))), &error);
+
+  return store_TIME_with_warning(&ltime, &str, error, tmp != -1);
+}
+
+
+int Field_temporal::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  int error = 0, have_smth_to_conv= 1;
+  MYSQL_TIME l_time= *ltime;
+  ErrConvTime str(ltime);
+  /*
+    We don't perform range checking here since values stored in TIME
+    structure always fit into DATETIME range.
+  */
+  have_smth_to_conv= !check_date(&l_time, pack_time(&l_time) != 0,
+                                 (TIME_FUZZY_DATE |
+                                  (current_thd->variables.sql_mode &
+                                   (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
+                                    MODE_INVALID_DATES))), &error);
+  return store_TIME_with_warning(&l_time, &str, error, have_smth_to_conv);
+}
+
+my_decimal *Field_temporal::val_decimal(my_decimal *d)
+{
+  MYSQL_TIME ltime;
+  if (get_date(&ltime, TIME_FUZZY_DATE))
   {
-    tmp= TIME_MAX_VALUE;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                         ER_WARN_DATA_OUT_OF_RANGE, nr,
-                         MYSQL_TIMESTAMP_TIME, 1);
-    error= 1;
-  }
-  else
-  {
-    tmp=(long) nr;
-    if (tmp % 100 > 59 || tmp/100 % 100 > 59)
-    {
-      tmp=0;
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                           ER_WARN_DATA_OUT_OF_RANGE, nr,
-                           MYSQL_TIMESTAMP_TIME, 1);
-      error= 1;
-    }
+    bzero(&ltime, sizeof(ltime));
+    ltime.time_type= mysql_type_to_time_type(type());
   }
+  longlong intg= TIME_to_ulonglong(&ltime);
+  return seconds2my_decimal(ltime.neg, intg, ltime.second_part, d);
+}
+
+/****************************************************************************
+** time type
+** In string context: HH:MM:SS
+** In number context: HHMMSS
+** Stored as a 3 byte unsigned int
+****************************************************************************/
+
+void Field_time::store_TIME(MYSQL_TIME *ltime)
+{
+  long tmp= (ltime->day*24L+ltime->hour)*10000L +
+            (ltime->minute*100+ltime->second);
+  if (ltime->neg)
+    tmp= -tmp;
   int3store(ptr,tmp);
-  return error;
+}
+
+int Field_time::store(const char *from,uint len,CHARSET_INFO *cs)
+{
+  MYSQL_TIME ltime;
+  ErrConvString str(from, len, cs);
+  int was_cut;
+  int have_smth_to_conv=
+    str_to_time(cs, from, len, &ltime,
+                table->in_use->variables.sql_mode &
+                (MODE_NO_ZERO_DATE | MODE_NO_ZERO_IN_DATE |
+                 MODE_INVALID_DATES),
+                &was_cut) > MYSQL_TIMESTAMP_ERROR;
+
+  return store_TIME_with_warning(&ltime, &str, was_cut, have_smth_to_conv);
 }
 
 
+int Field_time::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  MYSQL_TIME l_time= *ltime;
+  ErrConvTime str(ltime);
+  int was_cut= 0;
+
+  int have_smth_to_conv= !check_time_range(&l_time, decimals(), &was_cut);
+  return store_TIME_with_warning(&l_time, &str, was_cut, have_smth_to_conv);
+}
+
+
+int Field_time::store(double nr)
+{
+  MYSQL_TIME ltime;
+  ErrConvDouble str(nr);
+  int was_cut;
+  bool neg= nr < 0;
+  if (neg)
+    nr= -nr;
+  int have_smth_to_conv= !number_to_time(neg, (longlong)nr,
+                                         (ulong)((nr - floor(nr)) * TIME_SECOND_PART_FACTOR),
+                                         &ltime, &was_cut);
+
+  return store_TIME_with_warning(&ltime, &str, was_cut, have_smth_to_conv);
+}
+
+
+int Field_time::store(longlong nr, bool unsigned_val)
+{
+  MYSQL_TIME ltime;
+  ErrConvInteger str(nr);
+  int was_cut;
+  int have_smth_to_conv= !number_to_time(nr < 0, nr < 0 ? -nr : nr,
+                                         0, &ltime, &was_cut);
+
+  return store_TIME_with_warning(&ltime, &str, was_cut, have_smth_to_conv);
+}
+  
+  
 double Field_time::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
@@ -5201,7 +5154,6 @@ String *Field_time::val_str(String *val_buffer,
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   MYSQL_TIME ltime;
-  val_buffer->alloc(MAX_DATE_STRING_REP_LENGTH);
   long tmp=(long) sint3korr(ptr);
   ltime.neg= 0;
   if (tmp < 0)
@@ -5209,12 +5161,19 @@ String *Field_time::val_str(String *val_buffer,
     tmp= -tmp;
     ltime.neg= 1;
   }
+  ltime.year= ltime.month= 0;
   ltime.day= (uint) 0;
   ltime.hour= (uint) (tmp/10000);
   ltime.minute= (uint) (tmp/100 % 100);
   ltime.second= (uint) (tmp % 100);
-  make_time((DATE_TIME_FORMAT*) 0, &ltime, val_buffer);
+  ltime.second_part= 0;
+
+  val_buffer->alloc(MAX_DATE_STRING_REP_LENGTH);
+  uint length= (uint) my_time_to_str(&ltime,
+                                     const_cast<char*>(val_buffer->ptr()), 0);
+  val_buffer->length(length);
   val_buffer->set_charset(&my_charset_numeric);
+
   return val_buffer;
 }
 
@@ -5228,8 +5187,8 @@ String *Field_time::val_str(String *val_buffer,
  
 bool Field_time::get_date(MYSQL_TIME *ltime, uint fuzzydate)
 {
-  THD *thd= table ? table->in_use : current_thd;
-  if (!(fuzzydate & TIME_FUZZY_DATE))
+  THD *thd= table->in_use;
+  if (!(fuzzydate & (TIME_FUZZY_DATE|TIME_TIME_ONLY)))
   {
     push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_WARN_DATA_OUT_OF_RANGE,
@@ -5237,12 +5196,6 @@ bool Field_time::get_date(MYSQL_TIME *ltime, uint fuzzydate)
                         thd->warning_info->current_row_for_warning());
     return 1;
   }
-  return Field_time::get_time(ltime);
-}
-
-
-bool Field_time::get_time(MYSQL_TIME *ltime)
-{
   long tmp=(long) sint3korr(ptr);
   ltime->neg=0;
   if (tmp < 0)
@@ -5263,11 +5216,9 @@ bool Field_time::get_time(MYSQL_TIME *ltime)
 
 bool Field_time::send_binary(Protocol *protocol)
 {
-  MYSQL_TIME tm;
-  Field_time::get_time(&tm);
-  tm.day= tm.hour/24;				// Move hours to days
-  tm.hour-= tm.day*24;
-  return protocol->store_time(&tm);
+  MYSQL_TIME ltime;
+  Field_time::get_date(&ltime, TIME_TIME_ONLY);
+  return protocol->store_time(&ltime, 0);
 }
 
 
@@ -5291,6 +5242,121 @@ void Field_time::sql_type(String &res) const
   res.set_ascii(STRING_WITH_LEN("time"));
 }
 
+int Field_time_hires::reset()
+{
+  store_bigendian(zero_point, ptr, Field_time_hires::pack_length());
+  return 0;
+}
+
+
+void Field_time_hires::store_TIME(MYSQL_TIME *ltime)
+{
+  ulonglong packed= sec_part_shift(pack_time(ltime), dec) + zero_point;
+  store_bigendian(packed, ptr, Field_time_hires::pack_length());
+}
+
+int Field_time_hires::store_decimal(const my_decimal *d)
+{
+  ulonglong nr;
+  ulong sec_part;
+  ErrConvDecimal str(d);
+  MYSQL_TIME ltime;
+  int was_cut;
+  bool neg= my_decimal2seconds(d, &nr, &sec_part);
+
+  int have_smth_to_conv= !number_to_time(neg, nr, sec_part, &ltime, &was_cut);
+
+  return store_TIME_with_warning(&ltime, &str, was_cut, have_smth_to_conv);
+}
+
+uint32 Field_time_hires::pack_length() const
+{
+  return time_hires_bytes[dec];
+}
+
+longlong Field_time_hires::val_int(void)
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  MYSQL_TIME ltime;
+  Field_time_hires::get_date(&ltime, TIME_TIME_ONLY);
+  longlong val= TIME_to_ulonglong_time(&ltime);
+  return ltime.neg ? -val : val;
+}
+
+double Field_time_hires::val_real(void)
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  MYSQL_TIME ltime;
+  Field_time_hires::get_date(&ltime, TIME_TIME_ONLY);
+  return TIME_to_double(&ltime);
+}
+
+String *Field_time_hires::val_str(String *str,
+                                  String *unused __attribute__((unused)))
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  MYSQL_TIME ltime;
+  Field_time_hires::get_date(&ltime, TIME_TIME_ONLY);
+  str->alloc(field_length+1);
+  str->length(my_time_to_str(&ltime, (char*) str->ptr(), dec));
+  str->set_charset(&my_charset_bin);
+  return str;
+}
+
+bool Field_time_hires::get_date(MYSQL_TIME *ltime, uint fuzzydate)
+{
+  uint32 len= pack_length();
+  longlong packed= read_bigendian(ptr, len);
+
+  packed= sec_part_unshift(packed - zero_point, dec);
+
+  unpack_time(packed, ltime);
+  /*
+    unpack_time() returns MYSQL_TIMESTAMP_DATETIME.
+    To get MYSQL_TIMESTAMP_TIME we need few adjustments
+  */
+  ltime->time_type= MYSQL_TIMESTAMP_TIME;
+  ltime->hour+= (ltime->month*32+ltime->day)*24;
+  ltime->month= ltime->day= 0;
+  return fuzzydate & (TIME_FUZZY_DATE | TIME_TIME_ONLY) ? 0 : 1;
+}
+
+
+bool Field_time_hires::send_binary(Protocol *protocol)
+{
+  MYSQL_TIME ltime;
+  Field_time_hires::get_date(&ltime, TIME_TIME_ONLY);
+  return protocol->store_time(&ltime, dec);
+}
+
+
+int Field_time_hires::cmp(const uchar *a_ptr, const uchar *b_ptr)
+{
+  ulonglong a=read_bigendian(a_ptr, Field_time_hires::pack_length());
+  ulonglong b=read_bigendian(b_ptr, Field_time_hires::pack_length());
+  return (a < b) ? -1 : (a > b) ? 1 : 0;
+}
+
+void Field_time_hires::sort_string(uchar *to,uint length __attribute__((unused)))
+{
+  DBUG_ASSERT(length == Field_time_hires::pack_length());
+  memcpy(to, ptr, length);
+  to[0]^= 128;
+}
+
+void Field_time_hires::sql_type(String &res) const
+{
+  CHARSET_INFO *cs=res.charset();
+  res.length(cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(),
+                                "time(%u)", dec));
+}
+
+void Field_time_hires::make_field(Send_field *field)
+{
+  Field::make_field(field);
+  field->decimals= dec;
+}
+
 /****************************************************************************
 ** year type
 ** Save in a byte the year 0, 1901->2155
@@ -5299,12 +5365,12 @@ void Field_time::sql_type(String &res) const
 
 int Field_year::store(const char *from, uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char *end;
   int error;
   longlong nr= cs->cset->strntoull10rnd(cs, from, len, 0, &end, &error);
 
-  if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155 ||
+  if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155 || 
       error == MY_ERRNO_ERANGE)
   {
     *ptr=0;
@@ -5336,7 +5402,7 @@ int Field_year::store(const char *from, uint len,CHARSET_INFO *cs)
 
 int Field_year::store(double nr)
 {
-  if (nr < 0.0 || nr >= 2155.0)
+  if (nr < 0.0 || nr > 2155.0)
   {
     (void) Field_year::store((longlong) -1, FALSE);
     return 1;
@@ -5347,7 +5413,7 @@ int Field_year::store(double nr)
 
 int Field_year::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155)
   {
     *ptr= 0;
@@ -5366,6 +5432,17 @@ int Field_year::store(longlong nr, bool unsigned_val)
 }
 
 
+int Field_year::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  ErrConvTime str(ltime);
+  if (Field_year::store(ltime->year, 0))
+    return 1;
+
+  set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
+                       &str, ltime->time_type, 1);
+  return 0;
+}
+
 bool Field_year::send_binary(Protocol *protocol)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
@@ -5406,6 +5483,15 @@ String *Field_year::val_str(String *val_buffer,
 }
 
 
+bool Field_year::get_date(MYSQL_TIME *ltime,uint fuzzydate)
+{
+  int tmp= (int) ptr[0];
+  if (tmp || field_length != 4)
+    tmp+= 1900;
+  return int_to_datetime_with_warn(tmp * 10000, ltime, fuzzydate, field_name);
+}
+
+
 void Field_year::sql_type(String &res) const
 {
   CHARSET_INFO *cs=res.charset();
@@ -5421,102 +5507,12 @@ void Field_year::sql_type(String &res) const
 ** Stored as a 4 byte unsigned int
 ****************************************************************************/
 
-int Field_date::store(const char *from, uint len,CHARSET_INFO *cs)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  MYSQL_TIME l_time;
-  uint32 tmp;
-  int error;
-  THD *thd= table ? table->in_use : current_thd;
-
-  if (str_to_datetime(cs, from, len, &l_time, TIME_FUZZY_DATE |
-                      (thd->variables.sql_mode &
-                       (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                        MODE_INVALID_DATES)),
-                      &error) <= MYSQL_TIMESTAMP_ERROR)
-  {
-    tmp= 0;
-    error= 2;
-  }
-  else
-    tmp=(uint32) l_time.year*10000L + (uint32) (l_time.month*100+l_time.day);
-
-  if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                         from, len, MYSQL_TIMESTAMP_DATE, 1);
-
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int4store(ptr,tmp);
-  }
-  else
-#endif
-    longstore(ptr,tmp);
-  return error;
-}
-
-
-int Field_date::store(double nr)
+void Field_date::store_TIME(MYSQL_TIME *ltime)
 {
-  longlong tmp;
-  if (nr >= 19000000000000.0 && nr <= 99991231235959.0)
-    nr=floor(nr/1000000.0);			// Timestamp to date
-  if (nr < 0.0 || nr > 99991231.0)
-  {
-    tmp= LL(0);
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         ER_WARN_DATA_OUT_OF_RANGE,
-                         nr, MYSQL_TIMESTAMP_DATE);
-  }
-  else
-    tmp= (longlong) rint(nr);
-
-  return Field_date::store(tmp, TRUE);
+  uint tmp= ltime->year*10000L + ltime->month*100+ltime->day;
+  int4store(ptr,tmp);
 }
 
-
-int Field_date::store(longlong nr, bool unsigned_val)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  MYSQL_TIME not_used;
-  int error;
-  longlong initial_nr= nr;
-  THD *thd= table ? table->in_use : current_thd;
-
-  nr= number_to_datetime(nr, &not_used, (TIME_FUZZY_DATE |
-                                         (thd->variables.sql_mode &
-                                          (MODE_NO_ZERO_IN_DATE |
-                                           MODE_NO_ZERO_DATE |
-                                           MODE_INVALID_DATES))), &error);
-
-  if (nr == LL(-1))
-  {
-    nr= 0;
-    error= 2;
-  }
-
-  if (nr >= 19000000000000.0 && nr <= 99991231235959.0)
-    nr= (longlong) floor(nr/1000000.0);         // Timestamp to date
-
-  if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         error == 2 ? ER_WARN_DATA_OUT_OF_RANGE :
-                         WARN_DATA_TRUNCATED, initial_nr,
-                         MYSQL_TIMESTAMP_DATETIME, 1);
-
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int4store(ptr, nr);
-  }
-  else
-#endif
-    longstore(ptr, nr);
-  return error;
-}
-
-
 bool Field_date::send_binary(Protocol *protocol)
 {
   longlong tmp= Field_date::val_int();
@@ -5532,12 +5528,7 @@ double Field_date::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   int32 j;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
   return (double) (uint32) j;
 }
 
@@ -5546,12 +5537,7 @@ longlong Field_date::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   int32 j;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
   return (longlong) (uint32) j;
 }
 
@@ -5561,68 +5547,38 @@ String *Field_date::val_str(String *val_buffer,
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   MYSQL_TIME ltime;
-  val_buffer->alloc(field_length);
   int32 tmp;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    tmp=sint4korr(ptr);
-  else
-#endif
-    longget(tmp,ptr);
+  tmp=sint4korr(ptr);
   ltime.neg= 0;
   ltime.year= (int) ((uint32) tmp/10000L % 10000);
   ltime.month= (int) ((uint32) tmp/100 % 100);
   ltime.day= (int) ((uint32) tmp % 100);
-  make_date((DATE_TIME_FORMAT *) 0, &ltime, val_buffer);
-  val_buffer->set_charset(&my_charset_numeric);
-  return val_buffer;
-}
 
+  val_buffer->alloc(MAX_DATE_STRING_REP_LENGTH);
+  uint length= (uint) my_date_to_str(&ltime,
+                                     const_cast<char*>(val_buffer->ptr()));
+  val_buffer->length(length);
+  val_buffer->set_charset(&my_charset_numeric);
 
-bool Field_date::get_time(MYSQL_TIME *ltime)
-{
-  bzero((char *)ltime, sizeof(MYSQL_TIME));
-  return 0;
+  return val_buffer;
 }
 
 
 int Field_date::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   int32 a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    a=sint4korr(a_ptr);
-    b=sint4korr(b_ptr);
-  }
-  else
-#endif
-  {
-    longget(a,a_ptr);
-    longget(b,b_ptr);
-  }
+  a=sint4korr(a_ptr);
+  b=sint4korr(b_ptr);
   return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0;
 }
 
 
 void Field_date::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table || !table->s->db_low_byte_first)
-  {
-    to[0] = ptr[0];
-    to[1] = ptr[1];
-    to[2] = ptr[2];
-    to[3] = ptr[3];
-  }
-  else
-#endif
-  {
-    to[0] = ptr[3];
-    to[1] = ptr[2];
-    to[2] = ptr[1];
-    to[3] = ptr[0];
-  }
+  to[0] = ptr[3];
+  to[1] = ptr[2];
+  to[2] = ptr[1];
+  to[3] = ptr[0];
 }
 
 void Field_date::sql_type(String &res) const
@@ -5637,153 +5593,10 @@ void Field_date::sql_type(String &res) const
 ** In number context: YYYYMMDD
 ****************************************************************************/
 
-/*
-  Store string into a date field
-
-  SYNOPSIS
-    Field_newdate::store()
-    from                Date string
-    len                 Length of date field
-    cs                  Character set (not used)
-
-  RETURN
-    0  ok
-    1  Value was cut during conversion
-    2  Wrong date string
-    3  Datetime value that was cut (warning level NOTE)
-       This is used by opt_range.cc:get_mm_leaf(). Note that there is a
-       nearly-identical class Field_date doesn't ever return 3 from its
-       store function.
-*/
-
-int Field_newdate::store(const char *from,uint len,CHARSET_INFO *cs)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  long tmp;
-  MYSQL_TIME l_time;
-  int error;
-  THD *thd= table ? table->in_use : current_thd;
-  enum enum_mysql_timestamp_type ret;
-  if ((ret= str_to_datetime(cs, from, len, &l_time,
-                            (TIME_FUZZY_DATE |
-                             (thd->variables.sql_mode &
-                              (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                               MODE_INVALID_DATES))),
-                            &error)) <= MYSQL_TIMESTAMP_ERROR)
-  {
-    tmp= 0;
-    error= 2;
-  }
-  else
-  {
-    tmp= l_time.day + l_time.month*32 + l_time.year*16*32;
-    if (!error && (ret != MYSQL_TIMESTAMP_DATE) &&
-        (l_time.hour || l_time.minute || l_time.second || l_time.second_part))
-      error= 3;                                 // Datetime was cut (note)
-  }
-
-  if (error)
-    set_datetime_warning(error == 3 ? MYSQL_ERROR::WARN_LEVEL_NOTE :
-                         MYSQL_ERROR::WARN_LEVEL_WARN,
-                         WARN_DATA_TRUNCATED,
-                         from, len, MYSQL_TIMESTAMP_DATE, 1);
-
-  int3store(ptr, tmp);
-  return error;
-}
-
-
-int Field_newdate::store(double nr)
-{
-  if (nr < 0.0 || nr > 99991231235959.0)
-  {
-    int3store(ptr,(int32) 0);
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         WARN_DATA_TRUNCATED, nr, MYSQL_TIMESTAMP_DATE);
-    return 1;
-  }
-  return Field_newdate::store((longlong) rint(nr), FALSE);
-}
-
-
-int Field_newdate::store(longlong nr, bool unsigned_val)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  MYSQL_TIME l_time;
-  longlong tmp;
-  int error;
-  THD *thd= table ? table->in_use : current_thd;
-  if (number_to_datetime(nr, &l_time,
-                         (TIME_FUZZY_DATE |
-                          (thd->variables.sql_mode &
-                           (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                            MODE_INVALID_DATES))),
-                         &error) == LL(-1))
-  {
-    tmp= 0L;
-    error= 2;
-  }
-  else
-    tmp= l_time.day + l_time.month*32 + l_time.year*16*32;
-
-  if (!error && l_time.time_type != MYSQL_TIMESTAMP_DATE &&
-      (l_time.hour || l_time.minute || l_time.second || l_time.second_part))
-    error= 3;
-
-  if (error)
-    set_datetime_warning(error == 3 ? MYSQL_ERROR::WARN_LEVEL_NOTE :
-                         MYSQL_ERROR::WARN_LEVEL_WARN,
-                         error == 2 ? 
-                         ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED,
-                         nr,MYSQL_TIMESTAMP_DATE, 1);
-
-  int3store(ptr,tmp);
-  return error;
-}
-
-
-int Field_newdate::store_time(MYSQL_TIME *ltime,timestamp_type time_type)
+void Field_newdate::store_TIME(MYSQL_TIME *ltime)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  long tmp;
-  int error= 0;
-  if (time_type == MYSQL_TIMESTAMP_DATE ||
-      time_type == MYSQL_TIMESTAMP_DATETIME)
-  {
-    tmp=ltime->year*16*32+ltime->month*32+ltime->day;
-    if (check_date(ltime, tmp != 0,
-                   (TIME_FUZZY_DATE |
-                    (current_thd->variables.sql_mode &
-                     (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                      MODE_INVALID_DATES))), &error))
-    {
-      char buff[MAX_DATE_STRING_REP_LENGTH];
-      String str(buff, sizeof(buff), &my_charset_latin1);
-      tmp= 0;
-      make_date((DATE_TIME_FORMAT *) 0, ltime, &str);
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                           str.ptr(), str.length(), MYSQL_TIMESTAMP_DATE, 1);
-    }
-    if (!error && ltime->time_type != MYSQL_TIMESTAMP_DATE &&
-        (ltime->hour || ltime->minute || ltime->second || ltime->second_part))
-    {
-      char buff[MAX_DATE_STRING_REP_LENGTH];
-      String str(buff, sizeof(buff), &my_charset_latin1);
-      make_datetime((DATE_TIME_FORMAT *) 0, ltime, &str);
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_NOTE,
-                           WARN_DATA_TRUNCATED,
-                           str.ptr(), str.length(), MYSQL_TIMESTAMP_DATE, 1);
-      error= 3;
-    }
-  }
-  else
-  {
-    tmp=0;
-    error= 1;
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
-  }
+  uint tmp= ltime->year*16*32 + ltime->month*32+ltime->day;
   int3store(ptr,tmp);
-  return error;
 }
 
 
@@ -5849,14 +5662,11 @@ bool Field_newdate::get_date(MYSQL_TIME *ltime,uint fuzzydate)
   ltime->year=  (tmp >> 9);
   ltime->time_type= MYSQL_TIMESTAMP_DATE;
   ltime->hour= ltime->minute= ltime->second= ltime->second_part= ltime->neg= 0;
-  return ((!(fuzzydate & TIME_FUZZY_DATE) && (!ltime->month || !ltime->day)) ?
-          1 : 0);
-}
-
-
-bool Field_newdate::get_time(MYSQL_TIME *ltime)
-{
-  return Field_newdate::get_date(ltime,0);
+  if (!tmp)
+    return fuzzydate & TIME_NO_ZERO_DATE;
+  if (!ltime->month || !ltime->day)
+    return !(fuzzydate & TIME_FUZZY_DATE);
+  return 0;
 }
 
 
@@ -5890,150 +5700,20 @@ void Field_newdate::sql_type(String &res) const
 ** Stored as a 8 byte unsigned int. Should sometimes be change to a 6 byte int.
 ****************************************************************************/
 
-int Field_datetime::store(const char *from,uint len,CHARSET_INFO *cs)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  MYSQL_TIME time_tmp;
-  int error;
-  ulonglong tmp= 0;
-  enum enum_mysql_timestamp_type func_res;
-  THD *thd= table ? table->in_use : current_thd;
-
-  func_res= str_to_datetime(cs, from, len, &time_tmp,
-                            (TIME_FUZZY_DATE |
-                             (thd->variables.sql_mode &
-                              (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                               MODE_INVALID_DATES))),
-                            &error);
-  if ((int) func_res > (int) MYSQL_TIMESTAMP_ERROR)
-    tmp= TIME_to_ulonglong_datetime(&time_tmp);
-  else
-    error= 1;                                 // Fix if invalid zero date
-
-  if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         ER_WARN_DATA_OUT_OF_RANGE,
-                         from, len, MYSQL_TIMESTAMP_DATETIME, 1);
-
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int8store(ptr,tmp);
-  }
-  else
-#endif
-    longlongstore(ptr,tmp);
-  return error;
-}
-
-
-int Field_datetime::store(double nr)
-{
-  int error= 0;
-  if (nr < 0.0 || nr > 99991231235959.0)
-  {
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                         ER_WARN_DATA_OUT_OF_RANGE,
-                         nr, MYSQL_TIMESTAMP_DATETIME);
-    nr= 0.0;
-    error= 1;
-  }
-  error|= Field_datetime::store((longlong) rint(nr), FALSE);
-  return error;
-}
-
-
-int Field_datetime::store(longlong nr, bool unsigned_val)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  MYSQL_TIME not_used;
-  int error;
-  longlong initial_nr= nr;
-  THD *thd= table ? table->in_use : current_thd;
-
-  nr= number_to_datetime(nr, &not_used, (TIME_FUZZY_DATE |
-                                         (thd->variables.sql_mode &
-                                          (MODE_NO_ZERO_IN_DATE |
-                                           MODE_NO_ZERO_DATE |
-                                           MODE_INVALID_DATES))), &error);
-
-  if (nr == LL(-1))
-  {
-    nr= 0;
-    error= 2;
-  }
-
-  if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         error == 2 ? ER_WARN_DATA_OUT_OF_RANGE :
-                         WARN_DATA_TRUNCATED, initial_nr,
-                         MYSQL_TIMESTAMP_DATETIME, 1);
-
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int8store(ptr,nr);
-  }
-  else
-#endif
-    longlongstore(ptr,nr);
-  return error;
-}
-
-
-int Field_datetime::store_time(MYSQL_TIME *ltime,timestamp_type time_type)
+void Field_datetime::store_TIME(MYSQL_TIME *ltime)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
-  longlong tmp;
-  int error= 0;
-  /*
-    We don't perform range checking here since values stored in TIME
-    structure always fit into DATETIME range.
-  */
-  if (time_type == MYSQL_TIMESTAMP_DATE ||
-      time_type == MYSQL_TIMESTAMP_DATETIME)
-  {
-    tmp=((ltime->year*10000L+ltime->month*100+ltime->day)*LL(1000000)+
-	 (ltime->hour*10000L+ltime->minute*100+ltime->second));
-    if (check_date(ltime, tmp != 0,
-                   (TIME_FUZZY_DATE |
-                    (current_thd->variables.sql_mode &
-                     (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                      MODE_INVALID_DATES))), &error))
-    {
-      char buff[MAX_DATE_STRING_REP_LENGTH];
-      String str(buff, sizeof(buff), &my_charset_latin1);
-      tmp= 0;
-      make_datetime((DATE_TIME_FORMAT *) 0, ltime, &str);
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                           str.ptr(), str.length(), MYSQL_TIMESTAMP_DATETIME,1);
-    }
-  }
-  else
-  {
-    tmp=0;
-    error= 1;
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
-  }
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int8store(ptr,tmp);
-  }
-  else
-#endif
-    longlongstore(ptr,tmp);
-  return error;
+  ulonglong tmp= TIME_to_ulonglong_datetime(ltime);
+  int8store(ptr,tmp);
 }
 
 bool Field_datetime::send_binary(Protocol *protocol)
 {
   MYSQL_TIME tm;
   Field_datetime::get_date(&tm, TIME_FUZZY_DATE);
-  return protocol->store(&tm);
+  return protocol->store(&tm, 0);
 }
-
-
+  
+  
 double Field_datetime::val_real(void)
 {
   return (double) Field_datetime::val_int();
@@ -6043,12 +5723,7 @@ longlong Field_datetime::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   longlong j;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    j=sint8korr(ptr);
-  else
-#endif
-    longlongget(j,ptr);
+  j=sint8korr(ptr);
   return j;
 }
 
@@ -6056,20 +5731,16 @@ longlong Field_datetime::val_int(void)
 String *Field_datetime::val_str(String *val_buffer,
 				String *val_ptr __attribute__((unused)))
 {
-  ASSERT_COLUMN_MARKED_FOR_READ;
   val_buffer->alloc(field_length);
   val_buffer->length(field_length);
+
+  ASSERT_COLUMN_MARKED_FOR_READ;
   ulonglong tmp;
   long part1,part2;
   char *pos;
   int part3;
 
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    tmp=sint8korr(ptr);
-  else
-#endif
-    longlongget(tmp,ptr);
+  tmp= Field_datetime::val_int();
 
   /*
     Avoid problem with slow longlong arithmetic and sprintf
@@ -6119,65 +5790,148 @@ bool Field_datetime::get_date(MYSQL_TIME *ltime, uint fuzzydate)
   ltime->day=		(int) (part1%100);
   ltime->month= 	(int) (part1/100%100);
   ltime->year= 		(int) (part1/10000);
-  return (!(fuzzydate & TIME_FUZZY_DATE) && (!ltime->month || !ltime->day)) ? 1 : 0;
-}
-
-bool Field_datetime::get_time(MYSQL_TIME *ltime)
-{
-  return Field_datetime::get_date(ltime,0);
+  if (!tmp)
+    return fuzzydate & TIME_NO_ZERO_DATE;
+  if (!ltime->month || !ltime->day)
+    return !(fuzzydate & TIME_FUZZY_DATE);
+  return 0;
 }
 
 int Field_datetime::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   longlong a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    a=sint8korr(a_ptr);
-    b=sint8korr(b_ptr);
-  }
-  else
-#endif
-  {
-    longlongget(a,a_ptr);
-    longlongget(b,b_ptr);
-  }
+  a=sint8korr(a_ptr);
+  b=sint8korr(b_ptr);
   return ((ulonglong) a < (ulonglong) b) ? -1 :
     ((ulonglong) a > (ulonglong) b) ? 1 : 0;
 }
 
 void Field_datetime::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table || !table->s->db_low_byte_first)
+  to[0] = ptr[7];
+  to[1] = ptr[6];
+  to[2] = ptr[5];
+  to[3] = ptr[4];
+  to[4] = ptr[3];
+  to[5] = ptr[2];
+  to[6] = ptr[1];
+  to[7] = ptr[0];
+}
+
+
+void Field_datetime::sql_type(String &res) const
+{
+  res.set_ascii(STRING_WITH_LEN("datetime"));
+}
+
+void Field_datetime_hires::store_TIME(MYSQL_TIME *ltime)
+{
+  ulonglong packed= sec_part_shift(pack_time(ltime), dec);
+  store_bigendian(packed, ptr, Field_datetime_hires::pack_length());
+}
+
+int Field_datetime_hires::store_decimal(const my_decimal *d)
+{
+  ulonglong nr;
+  ulong sec_part;
+  int error;
+  MYSQL_TIME ltime;
+  longlong tmp;
+  THD *thd= table->in_use;
+  ErrConvDecimal str(d);
+
+  if (my_decimal2seconds(d, &nr, &sec_part))
   {
-    to[0] = ptr[0];
-    to[1] = ptr[1];
-    to[2] = ptr[2];
-    to[3] = ptr[3];
-    to[4] = ptr[4];
-    to[5] = ptr[5];
-    to[6] = ptr[6];
-    to[7] = ptr[7];
+    tmp= -1;
+    error= 2;
   }
   else
-#endif
-  {
-    to[0] = ptr[7];
-    to[1] = ptr[6];
-    to[2] = ptr[5];
-    to[3] = ptr[4];
-    to[4] = ptr[3];
-    to[5] = ptr[2];
-    to[6] = ptr[1];
-    to[7] = ptr[0];
-  }
+    tmp= number_to_datetime(nr, sec_part, &ltime, (TIME_FUZZY_DATE |
+                                          (thd->variables.sql_mode &
+                                           (MODE_NO_ZERO_IN_DATE |
+                                            MODE_NO_ZERO_DATE |
+                                            MODE_INVALID_DATES))), &error);
+
+  return store_TIME_with_warning(&ltime, &str, error, tmp != -1);
 }
 
+bool Field_datetime_hires::send_binary(Protocol *protocol)
+{
+  MYSQL_TIME ltime;
+  Field_datetime_hires::get_date(&ltime, TIME_FUZZY_DATE);
+  return protocol->store(&ltime, dec);
+}
 
-void Field_datetime::sql_type(String &res) const
+
+double Field_datetime_hires::val_real(void)
 {
-  res.set_ascii(STRING_WITH_LEN("datetime"));
+  MYSQL_TIME ltime;
+  Field_datetime_hires::get_date(&ltime, TIME_FUZZY_DATE);
+  return TIME_to_double(&ltime);
+}
+
+longlong Field_datetime_hires::val_int(void)
+{
+  MYSQL_TIME ltime;
+  Field_datetime_hires::get_date(&ltime, TIME_FUZZY_DATE);
+  return TIME_to_ulonglong_datetime(&ltime);
+}
+
+
+String *Field_datetime_hires::val_str(String *str,
+                                      String *unused __attribute__((unused)))
+{
+  MYSQL_TIME ltime;
+  Field_datetime_hires::get_date(&ltime, TIME_FUZZY_DATE);
+  str->alloc(field_length+1);
+  str->length(field_length);
+  my_datetime_to_str(&ltime, (char*) str->ptr(), dec);
+  str->set_charset(&my_charset_bin);
+  return str;
+}
+
+bool Field_datetime_hires::get_date(MYSQL_TIME *ltime, uint fuzzydate)
+{
+  ulonglong packed= read_bigendian(ptr, Field_datetime_hires::pack_length());
+  unpack_time(sec_part_unshift(packed, dec), ltime);
+  if (!packed)
+    return fuzzydate & TIME_NO_ZERO_DATE;
+  if (!ltime->month || !ltime->day)
+    return !(fuzzydate & TIME_FUZZY_DATE);
+  return 0;
+}
+
+uint32 Field_datetime_hires::pack_length() const
+{
+  return datetime_hires_bytes[dec];
+}
+
+int Field_datetime_hires::cmp(const uchar *a_ptr, const uchar *b_ptr)
+{
+  ulonglong a=read_bigendian(a_ptr, Field_datetime_hires::pack_length());
+  ulonglong b=read_bigendian(b_ptr, Field_datetime_hires::pack_length());
+  return a < b ? -1 : a > b ? 1 : 0;
+}
+
+void Field_datetime_hires::sort_string(uchar *to,
+                                       uint length __attribute__((unused)))
+{
+  DBUG_ASSERT(length == Field_datetime_hires::pack_length());
+  memcpy(to, ptr, length);
+}
+
+
+void Field_datetime_hires::sql_type(String &res) const
+{
+  CHARSET_INFO *cs=res.charset();
+  res.length(cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(),
+                                "datetime(%u)", dec));
+}
+
+void Field_datetime_hires::make_field(Send_field *field)
+{
+  Field::make_field(field);
+  field->decimals= dec;
 }
 
 /****************************************************************************
@@ -6286,7 +6040,7 @@ Field_longstr::report_if_important_data(const char *pstr, const char *end,
 
 int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   uint copy_length;
   const char *well_formed_error_pos;
   const char *cannot_convert_error_pos;
@@ -6327,7 +6081,7 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
 
 int Field_str::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE];
   uint local_char_length= field_length / charset()->mbmaxlen;
   size_t length= 0;
@@ -6548,8 +6302,7 @@ void Field_string::sql_type(String &res) const
 
   length= cs->cset->snprintf(cs,(char*) res.ptr(),
                              res.alloced_length(), "%s(%d)",
-                             ((type() == MYSQL_TYPE_VAR_STRING &&
-                               !thd->variables.new_mode) ?
+                             (type() == MYSQL_TYPE_VAR_STRING ?
                               (has_charset() ? "varchar" : "varbinary") :
 			      (has_charset() ? "char" : "binary")),
                              (int) field_length / charset()->mbmaxlen);
@@ -6560,9 +6313,7 @@ void Field_string::sql_type(String &res) const
 }
 
 
-uchar *Field_string::pack(uchar *to, const uchar *from,
-                          uint max_length,
-                          bool low_byte_first __attribute__((unused)))
+uchar *Field_string::pack(uchar *to, const uchar *from, uint max_length)
 {
   uint length=      min(field_length,max_length);
   uint local_char_length= max_length/field_charset->mbmaxlen;
@@ -6618,10 +6369,7 @@ uchar *Field_string::pack(uchar *to, const uchar *from,
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field_string::unpack(uchar *to,
-                     const uchar *from,
-                     uint param_data,
-                     bool low_byte_first __attribute__((unused)))
+Field_string::unpack(uchar *to, const uchar *from, uint param_data)
 {
   uint from_length, length;
 
@@ -6790,7 +6538,7 @@ int Field_varstring::do_save_field_metadata(uchar *metadata_ptr)
 
 int Field_varstring::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   uint copy_length;
   const char *well_formed_error_pos;
   const char *cannot_convert_error_pos;
@@ -7035,9 +6783,7 @@ uint32 Field_varstring::data_length()
   Here the number of length bytes are depending on the given max_length
 */
 
-uchar *Field_varstring::pack(uchar *to, const uchar *from,
-                             uint max_length,
-                             bool low_byte_first __attribute__((unused)))
+uchar *Field_varstring::pack(uchar *to, const uchar *from, uint max_length)
 {
   uint length= length_bytes == 1 ? (uint) *from : uint2korr(from);
   set_if_smaller(max_length, field_length);
@@ -7072,9 +6818,7 @@ uchar *Field_varstring::pack(uchar *to, const uchar *from,
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field_varstring::unpack(uchar *to, const uchar *from,
-                        uint param_data,
-                        bool low_byte_first __attribute__((unused)))
+Field_varstring::unpack(uchar *to, const uchar *from, uint param_data)
 {
   uint length;
   uint l_bytes= (param_data && (param_data < field_length)) ? 
@@ -7248,109 +6992,21 @@ Field_blob::Field_blob(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
 }
 
 
-void Field_blob::store_length(uchar *i_ptr, 
-                              uint i_packlength, 
-                              uint32 i_number, 
-                              bool low_byte_first)
+void Field_blob::store_length(uchar *i_ptr, uint i_packlength, uint32 i_number)
 {
-  switch (i_packlength) {
-  case 1:
-    i_ptr[0]= (uchar) i_number;
-    break;
-  case 2:
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first)
-    {
-      int2store(i_ptr,(unsigned short) i_number);
-    }
-    else
-#endif
-      shortstore(i_ptr,(unsigned short) i_number);
-    break;
-  case 3:
-    int3store(i_ptr,i_number);
-    break;
-  case 4:
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first)
-    {
-      int4store(i_ptr,i_number);
-    }
-    else
-#endif
-      longstore(i_ptr,i_number);
-  }
+  store_lowendian(i_number, i_ptr, i_packlength);
 }
 
 
-uint32 Field_blob::get_length(const uchar *pos, uint packlength_arg, bool low_byte_first)
+uint32 Field_blob::get_length(const uchar *pos, uint packlength_arg)
 {
-  switch (packlength_arg) {
-  case 1:
-    return (uint32) pos[0];
-  case 2:
-    {
-      uint16 tmp;
-#ifdef WORDS_BIGENDIAN
-      if (low_byte_first)
-	tmp=sint2korr(pos);
-      else
-#endif
-	shortget(tmp,pos);
-      return (uint32) tmp;
-    }
-  case 3:
-    return (uint32) uint3korr(pos);
-  case 4:
-    {
-      uint32 tmp;
-#ifdef WORDS_BIGENDIAN
-      if (low_byte_first)
-	tmp=uint4korr(pos);
-      else
-#endif
-	longget(tmp,pos);
-      return (uint32) tmp;
-    }
-  }
-  /* When expanding this, see also MAX_FIELD_BLOBLENGTH. */
-  return 0;					// Impossible
-}
-
-
-/**
-  Put a blob length field into a record buffer.
-
-  Depending on the maximum length of a blob, its length field is
-  put into 1 to 4 bytes. This is a property of the blob object,
-  described by 'packlength'.
-
-  @param pos                 Pointer into the record buffer.
-  @param length              The length value to put.
-*/
-
-void Field_blob::put_length(uchar *pos, uint32 length)
-{
-  switch (packlength) {
-  case 1:
-    *pos= (char) length;
-    break;
-  case 2:
-    int2store(pos, length);
-    break;
-  case 3:
-    int3store(pos, length);
-    break;
-  case 4:
-    int4store(pos, length);
-    break;
-  }
+  return (uint32)read_lowendian(pos, packlength_arg);
 }
 
 
 int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   uint copy_length, new_length;
   const char *well_formed_error_pos;
   const char *cannot_convert_error_pos;
@@ -7685,20 +7341,7 @@ void Field_blob::sort_string(uchar *to,uint length)
       length-= packlength;
       pos= to+length;
 
-      switch (packlength) {
-      case 1:
-        *pos= (char) blob_length;
-        break;
-      case 2:
-        mi_int2store(pos, blob_length);
-        break;
-      case 3:
-        mi_int3store(pos, blob_length);
-        break;
-      case 4:
-        mi_int4store(pos, blob_length);
-        break;
-      }
+      store_bigendian(blob_length, pos, packlength);
     }
     memcpy(&blob, ptr+packlength, sizeof(char*));
     
@@ -7728,8 +7371,7 @@ void Field_blob::sql_type(String &res) const
   }
 }
 
-uchar *Field_blob::pack(uchar *to, const uchar *from,
-                        uint max_length, bool low_byte_first)
+uchar *Field_blob::pack(uchar *to, const uchar *from, uint max_length)
 {
   uchar *save= ptr;
   ptr= (uchar*) from;
@@ -7740,7 +7382,7 @@ uchar *Field_blob::pack(uchar *to, const uchar *from,
     length given is smaller than the actual length of the blob, we
     just store the initial bytes of the blob.
   */
-  store_length(to, packlength, min(length, max_length), low_byte_first);
+  store_length(to, packlength, min(length, max_length));
 
   /*
     Store the actual blob data, which will occupy 'length' bytes.
@@ -7772,18 +7414,14 @@ uchar *Field_blob::pack(uchar *to, const uchar *from,
 
    @return  New pointer into memory based on from + length of the data
 */
-const uchar *Field_blob::unpack(uchar *to, 
-                                const uchar *from,
-                                uint param_data,
-                                bool low_byte_first)
+const uchar *Field_blob::unpack(uchar *to, const uchar *from, uint param_data)
 {
   DBUG_ENTER("Field_blob::unpack");
-  DBUG_PRINT("enter", ("to: 0x%lx; from: 0x%lx;"
-                       " param_data: %u; low_byte_first: %d",
-                       (ulong) to, (ulong) from, param_data, low_byte_first));
+  DBUG_PRINT("enter", ("to: 0x%lx; from: 0x%lx; param_data: %u",
+                       (ulong) to, (ulong) from, param_data));
   uint const master_packlength=
     param_data > 0 ? param_data & 0xFF : packlength;
-  uint32 const length= get_length(from, master_packlength, low_byte_first);
+  uint32 const length= get_length(from, master_packlength);
   DBUG_DUMP("packed", from, length + master_packlength);
   bitmap_set_bit(table->write_set, field_index);
   store(reinterpret_cast<const char*>(from) + master_packlength,
@@ -7792,6 +7430,7 @@ const uchar *Field_blob::unpack(uchar *to,
   DBUG_RETURN(from + master_packlength + length);
 }
 
+
 uint Field_blob::packed_col_length(const uchar *data_ptr, uint length)
 {
   if (length > 255)
@@ -7929,39 +7568,7 @@ enum ha_base_keytype Field_enum::key_type() const
 
 void Field_enum::store_type(ulonglong value)
 {
-  switch (packlength) {
-  case 1: ptr[0]= (uchar) value;  break;
-  case 2:
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int2store(ptr,(unsigned short) value);
-  }
-  else
-#endif
-    shortstore(ptr,(unsigned short) value);
-  break;
-  case 3: int3store(ptr,(long) value); break;
-  case 4:
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int4store(ptr,value);
-  }
-  else
-#endif
-    longstore(ptr,(long) value);
-  break;
-  case 8:
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int8store(ptr,value);
-  }
-  else
-#endif
-    longlongstore(ptr,value); break;
-  }
+  store_lowendian(value, ptr, packlength);
 }
 
 
@@ -7973,7 +7580,7 @@ void Field_enum::store_type(ulonglong value)
 
 int Field_enum::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int err= 0;
   uint32 not_used;
   char buff[STRING_BUFFER_USUAL_SIZE];
@@ -8022,7 +7629,7 @@ int Field_enum::store(double nr)
 
 int Field_enum::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   if ((ulonglong) nr > typelib->count || nr == 0)
   {
@@ -8047,46 +7654,7 @@ double Field_enum::val_real(void)
 longlong Field_enum::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
-  switch (packlength) {
-  case 1:
-    return (longlong) ptr[0];
-  case 2:
-  {
-    uint16 tmp;
-#ifdef WORDS_BIGENDIAN
-    if (table->s->db_low_byte_first)
-      tmp=sint2korr(ptr);
-    else
-#endif
-      shortget(tmp,ptr);
-    return (longlong) tmp;
-  }
-  case 3:
-    return (longlong) uint3korr(ptr);
-  case 4:
-  {
-    uint32 tmp;
-#ifdef WORDS_BIGENDIAN
-    if (table->s->db_low_byte_first)
-      tmp=uint4korr(ptr);
-    else
-#endif
-      longget(tmp,ptr);
-    return (longlong) tmp;
-  }
-  case 8:
-  {
-    longlong tmp;
-#ifdef WORDS_BIGENDIAN
-    if (table->s->db_low_byte_first)
-      tmp=sint8korr(ptr);
-    else
-#endif
-      longlongget(tmp,ptr);
-    return tmp;
-  }
-  }
-  return 0;					// impossible
+  return read_lowendian(ptr, packlength);
 }
 
 
@@ -8191,7 +7759,7 @@ Field *Field_enum::new_field(MEM_ROOT *root, TABLE *new_table,
 
 int Field_set::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   bool got_warning= 0;
   int err= 0;
   char *not_used;
@@ -8231,7 +7799,7 @@ int Field_set::store(const char *from,uint length,CHARSET_INFO *cs)
 
 int Field_set::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   ulonglong max_nr;
 
@@ -8395,51 +7963,20 @@ uint Field_enum::is_equal(Create_field *new_field)
 }
 
 
-uchar *Field_enum::pack(uchar *to, const uchar *from,
-                        uint max_length, bool low_byte_first)
+uchar *Field_enum::pack(uchar *to, const uchar *from, uint max_length)
 {
   DBUG_ENTER("Field_enum::pack");
   DBUG_PRINT("debug", ("packlength: %d", packlength));
   DBUG_DUMP("from", from, packlength);
-
-  switch (packlength)
-  {
-  case 1:
-    *to = *from;
-    DBUG_RETURN(to + 1);
-  case 2: DBUG_RETURN(pack_int16(to, from, low_byte_first));
-  case 3: DBUG_RETURN(pack_int24(to, from, low_byte_first));
-  case 4: DBUG_RETURN(pack_int32(to, from, low_byte_first));
-  case 8: DBUG_RETURN(pack_int64(to, from, low_byte_first));
-  default:
-    DBUG_ASSERT(0);
-  }
-  MY_ASSERT_UNREACHABLE();
-  DBUG_RETURN(NULL);
+  DBUG_RETURN(pack_int(to, from, packlength));
 }
 
-const uchar *Field_enum::unpack(uchar *to, const uchar *from,
-                                uint param_data, bool low_byte_first)
+const uchar *Field_enum::unpack(uchar *to, const uchar *from, uint param_data)
 {
   DBUG_ENTER("Field_enum::unpack");
   DBUG_PRINT("debug", ("packlength: %d", packlength));
   DBUG_DUMP("from", from, packlength);
-
-  switch (packlength)
-  {
-  case 1:
-    *to = *from;
-    DBUG_RETURN(from + 1);
-
-  case 2: DBUG_RETURN(unpack_int16(to, from, low_byte_first));
-  case 3: DBUG_RETURN(unpack_int24(to, from, low_byte_first));
-  case 4: DBUG_RETURN(unpack_int32(to, from, low_byte_first));
-  case 8: DBUG_RETURN(unpack_int64(to, from, low_byte_first));
-  default:
-    DBUG_ASSERT(0);
-  }
-  MY_ASSERT_UNREACHABLE();
-  DBUG_RETURN(NULL);
+  DBUG_RETURN(unpack_int(to, from, packlength));
 }
 
 
@@ -8602,10 +8139,11 @@ uint Field_bit::is_equal(Create_field *new_field)
                        
 int Field_bit::store(const char *from, uint length, CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int delta;
 
-  for (; length && !*from; from++, length--) ;         // skip left 0's
+  for (; length && !*from; from++, length--)          // skip left 0's
+    ;
   delta= bytes_in_rec - length;
 
   if (delta < -1 ||
@@ -8887,8 +8425,7 @@ void Field_bit::sql_type(String &res) const
 
 
 uchar *
-Field_bit::pack(uchar *to, const uchar *from, uint max_length,
-                bool low_byte_first __attribute__((unused)))
+Field_bit::pack(uchar *to, const uchar *from, uint max_length)
 {
   DBUG_ASSERT(max_length > 0);
   uint length;
@@ -8935,8 +8472,7 @@ Field_bit::pack(uchar *to, const uchar *from, uint max_length,
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field_bit::unpack(uchar *to, const uchar *from, uint param_data,
-                  bool low_byte_first __attribute__((unused)))
+Field_bit::unpack(uchar *to, const uchar *from, uint param_data)
 {
   DBUG_ENTER("Field_bit::unpack");
   DBUG_PRINT("enter", ("to: %p, from: %p, param_data: 0x%x",
@@ -9027,11 +8563,12 @@ Field_bit_as_char::Field_bit_as_char(uchar *ptr_arg, uint32 len_arg,
 
 int Field_bit_as_char::store(const char *from, uint length, CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int delta;
   uchar bits= (uchar) (field_length & 7);
 
-  for (; length && !*from; from++, length--) ;         // skip left 0's
+  for (; length && !*from; from++, length--)          // skip left 0's
+    ;
   delta= bytes_in_rec - length;
 
   if (delta < 0 ||
@@ -9220,6 +8757,9 @@ void Create_field::init_for_tmp_table(enum_field_types sql_type_arg,
                        FLAGSTR(pack_flag, FIELDFLAG_BLOB),
                        FLAGSTR(pack_flag, FIELDFLAG_DECIMAL),
                        f_packtype(pack_flag)));
+  vcol_info= 0;
+  stored_in_db= TRUE;
+
   DBUG_VOID_RETURN;
 }
 
@@ -9240,6 +8780,7 @@ void Create_field::init_for_tmp_table(enum_field_types sql_type_arg,
   @param fld_interval_list     Interval list (if any)
   @param fld_charset           Field charset
   @param fld_geom_type         Field geometry type (if any)
+  @param fld_vcol_info         Virtual column data
 
   @retval
     FALSE on success
@@ -9252,17 +8793,20 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
                         uint fld_type_modifier, Item *fld_default_value,
                         Item *fld_on_update_value, LEX_STRING *fld_comment,
                         char *fld_change, List<String> *fld_interval_list,
-                        CHARSET_INFO *fld_charset, uint fld_geom_type)
+                        CHARSET_INFO *fld_charset, uint fld_geom_type,
+			Virtual_column_info *fld_vcol_info,
+                        engine_option_value *create_opt)
 {
   uint sign_len, allowed_type_modifier= 0;
   ulong max_field_charlength= MAX_FIELD_CHARLENGTH;
 
   DBUG_ENTER("Create_field::init()");
-  
+
   field= 0;
   field_name= fld_name;
   def= fld_default_value;
   flags= fld_type_modifier;
+  option_list= create_opt;
   unireg_check= (fld_type_modifier & AUTO_INCREMENT_FLAG ?
                  Field::NEXT_NUMBER : Field::NONE);
   decimals= fld_decimals ? (uint)atoi(fld_decimals) : 0;
@@ -9283,6 +8827,33 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
   interval_list.empty();
 
   comment= *fld_comment;
+  vcol_info= fld_vcol_info;
+  stored_in_db= TRUE;
+
+  /* Initialize data for a computed field */
+  if ((uchar)fld_type == (uchar)MYSQL_TYPE_VIRTUAL)
+  {
+    DBUG_ASSERT(vcol_info && vcol_info->expr_item);
+    stored_in_db= vcol_info->is_stored();
+    /*
+      Walk through the Item tree checking if all items are valid
+      to be part of the virtual column
+    */
+    if (vcol_info->expr_item->walk(&Item::check_vcol_func_processor, 0, NULL))
+    {
+      my_error(ER_VIRTUAL_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(0), field_name);
+      DBUG_RETURN(TRUE);
+    }
+
+    /*
+      Make a field created for the real type.
+      Note that regular and computed fields differ from each other only by
+      Field::vcol_info. It is is always NULL for a column that is not
+      computed.
+    */
+    sql_type= fld_type= vcol_info->get_real_type();
+  }
+
   /*
     Set NO_DEFAULT_VALUE_FLAG if this field doesn't have a default value and
     it is NOT NULL, not an AUTO_INCREMENT field and not a TIMESTAMP.
@@ -9452,28 +9023,14 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
     }
     break;
   case MYSQL_TYPE_TIMESTAMP:
-    if (fld_length == NULL)
-    {
-      length= MAX_DATETIME_WIDTH;
-    }
-    else if (length != MAX_DATETIME_WIDTH)
+    if (length > MAX_DATETIME_PRECISION)
     {
-      /*
-        We support only even TIMESTAMP lengths less or equal than 14
-        and 19 as length of 4.1 compatible representation.  Silently 
-        shrink it to MAX_DATETIME_COMPRESSED_WIDTH.
-      */
-      DBUG_ASSERT(MAX_DATETIME_COMPRESSED_WIDTH < UINT_MAX);
-      if (length != UINT_MAX)  /* avoid overflow; is safe because of min() */
-        length= ((length+1)/2)*2;
-      length= min(length, MAX_DATETIME_COMPRESSED_WIDTH);
+      my_error(ER_TOO_BIG_PRECISION, MYF(0), length, fld_name,
+               MAX_DATETIME_PRECISION);
+      DBUG_RETURN(TRUE);
     }
-    flags|= ZEROFILL_FLAG | UNSIGNED_FLAG;
-    /*
-      Since we silently rewrite down to MAX_DATETIME_COMPRESSED_WIDTH bytes,
-      the parser should not raise errors unless bizzarely large. 
-     */
-    max_field_charlength= UINT_MAX;
+    length+= MAX_DATETIME_WIDTH + (length ? 1 : 0);
+    flags|= UNSIGNED_FLAG;
 
     if (fld_default_value)
     {
@@ -9514,17 +9071,29 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
     }
     break;
   case MYSQL_TYPE_DATE:
-    /* Old date type. */
+    /* We don't support creation of MYSQL_TYPE_DATE anymore */
     sql_type= MYSQL_TYPE_NEWDATE;
     /* fall trough */
   case MYSQL_TYPE_NEWDATE:
     length= MAX_DATE_WIDTH;
     break;
   case MYSQL_TYPE_TIME:
-    length= 10;
+    if (length > MAX_DATETIME_PRECISION)
+    {
+      my_error(ER_TOO_BIG_PRECISION, MYF(0), length, fld_name,
+               MAX_DATETIME_PRECISION);
+      DBUG_RETURN(TRUE);
+    }
+    length+= MIN_TIME_WIDTH + (length ? 1 : 0);
     break;
   case MYSQL_TYPE_DATETIME:
-    length= MAX_DATETIME_WIDTH;
+    if (length > MAX_DATETIME_PRECISION)
+    {
+      my_error(ER_TOO_BIG_PRECISION, MYF(0), length, fld_name,
+               MAX_DATETIME_PRECISION);
+      DBUG_RETURN(TRUE);
+    }
+    length+= MAX_DATETIME_WIDTH + (length ? 1 : 0);
     break;
   case MYSQL_TYPE_SET:
     {
@@ -9572,7 +9141,7 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
     }
   case MYSQL_TYPE_DECIMAL:
     DBUG_ASSERT(0); /* Was obsolete */
-  }
+ }
   /* Remember the value of length */
   char_length= length;
 
@@ -9644,14 +9213,22 @@ uint32 calc_pack_length(enum_field_types type,uint32 length)
   case MYSQL_TYPE_TINY	: return 1;
   case MYSQL_TYPE_SHORT : return 2;
   case MYSQL_TYPE_INT24:
-  case MYSQL_TYPE_NEWDATE:
-  case MYSQL_TYPE_TIME:   return 3;
+  case MYSQL_TYPE_NEWDATE: return 3;
+  case MYSQL_TYPE_TIME:   return length > MIN_TIME_WIDTH
+                            ? time_hires_bytes[length - 1 - MIN_TIME_WIDTH]
+                            : 3;
   case MYSQL_TYPE_TIMESTAMP:
+                          return length > MAX_DATETIME_WIDTH
+                            ? 4 + sec_part_bytes[length - 1 - MAX_DATETIME_WIDTH]
+                            : 4;
   case MYSQL_TYPE_DATE:
   case MYSQL_TYPE_LONG	: return 4;
   case MYSQL_TYPE_FLOAT : return sizeof(float);
   case MYSQL_TYPE_DOUBLE: return sizeof(double);
   case MYSQL_TYPE_DATETIME:
+                          return length > MAX_DATETIME_WIDTH
+                            ? datetime_hires_bytes[length - 1 - MAX_DATETIME_WIDTH]
+                            : 8;
   case MYSQL_TYPE_LONGLONG: return 8;	/* Don't crash if no longlong */
   case MYSQL_TYPE_NULL	: return 0;
   case MYSQL_TYPE_TINY_BLOB:	return 1+portable_sizeof_char_ptr;
@@ -9832,9 +9409,12 @@ Field *make_field(TABLE_SHARE *share, uchar *ptr, uint32 field_length,
 			      f_is_zerofill(pack_flag) != 0,
 			      f_is_dec(pack_flag) == 0);
   case MYSQL_TYPE_TIMESTAMP:
-    return new Field_timestamp(ptr,field_length, null_pos, null_bit,
-                               unireg_check, field_name, share,
-                               field_charset);
+  {
+    uint dec= field_length > MAX_DATETIME_WIDTH ?
+                       field_length - MAX_DATETIME_WIDTH - 1: 0;
+    return new_Field_timestamp(ptr, null_pos, null_bit, unireg_check,
+                               field_name, share, dec, field_charset);
+  }
   case MYSQL_TYPE_YEAR:
     return new Field_year(ptr,field_length,null_pos,null_bit,
 			  unireg_check, field_name);
@@ -9845,11 +9425,19 @@ Field *make_field(TABLE_SHARE *share, uchar *ptr, uint32 field_length,
     return new Field_newdate(ptr,null_pos,null_bit,
 			     unireg_check, field_name, field_charset);
   case MYSQL_TYPE_TIME:
-    return new Field_time(ptr,null_pos,null_bit,
-			  unireg_check, field_name, field_charset);
+  {
+    uint dec= field_length > MIN_TIME_WIDTH ?
+                       field_length - MIN_TIME_WIDTH - 1: 0;
+    return new_Field_time(ptr, null_pos, null_bit, unireg_check,
+                              field_name, dec, field_charset);
+  }
   case MYSQL_TYPE_DATETIME:
-    return new Field_datetime(ptr,null_pos,null_bit,
-			      unireg_check, field_name, field_charset);
+  {
+    uint dec= field_length > MAX_DATETIME_WIDTH ?
+                       field_length - MAX_DATETIME_WIDTH - 1: 0;
+    return new_Field_datetime(ptr, null_pos, null_bit, unireg_check,
+                              field_name, dec, field_charset);
+  }
   case MYSQL_TYPE_NULL:
     return new Field_null(ptr, field_length, unireg_check, field_name,
                           field_charset);
@@ -9882,6 +9470,10 @@ Create_field::Create_field(Field *old_field,Field *orig_field)
   charset=    old_field->charset();		// May be NULL ptr
   comment=    old_field->comment;
   decimals=   old_field->decimals();
+  vcol_info=  old_field->vcol_info;
+  stored_in_db= old_field->stored_in_db;
+  option_list= old_field->option_list;
+  option_struct= old_field->option_struct;
 
   /* Fix if the original table had 4 byte pointer blobs */
   if (flags & BLOB_FLAG)
@@ -9989,6 +9581,19 @@ uint32 Field_blob::char_length()
 
 
 /**
+  Makes a clone of this object for ALTER/CREATE TABLE
+
+  @param mem_root        MEM_ROOT where to clone the field
+*/
+
+Create_field *Create_field::clone(MEM_ROOT *mem_root) const
+{
+  Create_field *res= new (mem_root) Create_field(*this);
+  return res;
+}
+
+
+/**
   maximum possible display length for blob.
 
   @return
@@ -10031,14 +9636,9 @@ uint32 Field_blob::max_display_length()
 
     if count_cuted_fields == CHECK_FIELD_IGNORE then we ignore notes.
     This allows us to avoid notes in optimisation, like convert_constant_item().
-
-  @retval
-    1 if count_cuted_fields == CHECK_FIELD_IGNORE and error level is not NOTE
-  @retval
-    0 otherwise
 */
 
-bool 
+void 
 Field::set_warning(MYSQL_ERROR::enum_warning_level level, uint code,
                    int cuted_increment)
 {
@@ -10052,9 +9652,7 @@ Field::set_warning(MYSQL_ERROR::enum_warning_level level, uint code,
     thd->cuted_fields+= cuted_increment;
     push_warning_printf(thd, level, code, ER(code), field_name,
                         thd->warning_info->current_row_for_warning());
-    return 0;
   }
-  return level >= MYSQL_ERROR::WARN_LEVEL_WARN;
 }
 
 
@@ -10064,7 +9662,6 @@ Field::set_warning(MYSQL_ERROR::enum_warning_level level, uint code,
   @param level            level of message (Note/Warning/Error)
   @param code             error code of message to be produced
   @param str              string value which we tried to save
-  @param str_length       length of string which we tried to save
   @param ts_type          type of datetime value (datetime/date/time)
   @param cuted_increment  whenever we should increase cut fields count or not
 
@@ -10072,80 +9669,42 @@ Field::set_warning(MYSQL_ERROR::enum_warning_level level, uint code,
     This function will always produce some warning but won't increase cut
     fields counter if count_cuted_fields ==FIELD_CHECK_IGNORE for current
     thread.
-*/
-
-void 
-Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level, uint code, 
-                            const char *str, uint str_length, 
-                            timestamp_type ts_type, int cuted_increment)
-{
-  THD *thd= table ? table->in_use : current_thd;
-  if ((thd->really_abort_on_warning() &&
-       level >= MYSQL_ERROR::WARN_LEVEL_WARN) ||
-      set_warning(level, code, cuted_increment))
-    make_truncated_value_warning(thd, level, str, str_length, ts_type,
-                                 field_name);
-}
-
 
-/**
-  Produce warning or note about integer datetime value saved into field.
-
-  @param level            level of message (Note/Warning/Error)
-  @param code             error code of message to be produced
-  @param nr               numeric value which we tried to save
-  @param ts_type          type of datetime value (datetime/date/time)
-  @param cuted_increment  whenever we should increase cut fields count or not
+    See also bug#2336
 
-  @note
-    This function will always produce some warning but won't increase cut
-    fields counter if count_cuted_fields == FIELD_CHECK_IGNORE for current
-    thread.
 */
 
-void 
-Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level, uint code, 
-                            longlong nr, timestamp_type ts_type,
-                            int cuted_increment)
+void Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level,
+                                 uint code, const ErrConv *str,
+                                 timestamp_type ts_type, int cuted_increment)
 {
-  THD *thd= table ? table->in_use : current_thd;
-  if (thd->really_abort_on_warning() ||
-      set_warning(level, code, cuted_increment))
-  {
-    char str_nr[22];
-    char *str_end= longlong10_to_str(nr, str_nr, -10);
-    make_truncated_value_warning(thd, level, str_nr, (uint) (str_end - str_nr), 
-                                 ts_type, field_name);
-  }
+  THD *thd= table->in_use;
+  if (thd->really_abort_on_warning() && level >= MYSQL_ERROR::WARN_LEVEL_WARN)
+    make_truncated_value_warning(thd, level, str, ts_type, field_name);
+  else
+    set_warning(level, code, cuted_increment);
 }
 
 
-/**
-  Produce warning or note about double datetime data saved into field.
-
-  @param level            level of message (Note/Warning/Error)
-  @param code             error code of message to be produced
-  @param nr               double value which we tried to save
-  @param ts_type          type of datetime value (datetime/date/time)
-
-  @note
-    This function will always produce some warning but won't increase cut
-    fields counter if count_cuted_fields == FIELD_CHECK_IGNORE for current
-    thread.
+/*
+  @brief
+  Return possible keys for a field
+
+  @details
+  Return bit map of keys over this field which can be used by the range
+  optimizer. For a field of a generic table such keys are all keys that starts
+  from this field. For a field of a materialized derived table/view such keys
+  are all keys in which this field takes a part. This is less restrictive as
+  keys for a materialized derived table/view are generated on the fly from
+  present fields, thus the case when a field for the beginning of a key is
+  absent is impossible.
+
+  @return map of possible keys
 */
 
-void 
-Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level, uint code, 
-                            double nr, timestamp_type ts_type)
+key_map Field::get_possible_keys()
 {
-  THD *thd= table ? table->in_use : current_thd;
-  if (thd->really_abort_on_warning() ||
-      set_warning(level, code, 1))
-  {
-    /* DBL_DIG is enough to print '-[digits].E+###' */
-    char str_nr[DBL_DIG + 8];
-    uint str_len= sprintf(str_nr, "%g", nr);
-    make_truncated_value_warning(thd, level, str_nr, str_len, ts_type,
-                                 field_name);
-  }
+  DBUG_ASSERT(table->pos_in_table_list);
+  return (table->pos_in_table_list->is_materialized_derived() ?
+          part_of_key : key_start);
 }
diff --git a/sql/field.h b/sql/field.h
index 9c2cd52cfa5..8e7e5a6d854 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1,7 +1,7 @@
 #ifndef FIELD_INCLUDED
 #define FIELD_INCLUDED
-
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011 Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -31,8 +31,6 @@
 #include "my_decimal.h"                         /* my_decimal */
 #include "sql_error.h"                          /* MYSQL_ERROR */
 
-#define DATETIME_DEC                     6
-
 class Send_field;
 class Protocol;
 class Create_field;
@@ -65,8 +63,13 @@ enum Derivation
 #define my_charset_numeric      my_charset_latin1
 #define MY_REPERTOIRE_NUMERIC   MY_REPERTOIRE_ASCII
 
+struct ha_field_option_struct;
+
 struct st_cache_field;
 int field_conv(Field *to,Field *from);
+int truncate_double(double *nr, uint field_length, uint dec,
+                    bool unsigned_flag, double max_value);
+longlong double_to_longlong(double nr, bool unsigned_flag, bool *error);
 
 inline uint get_enum_pack_length(int elements)
 {
@@ -79,6 +82,71 @@ inline uint get_set_pack_length(int elements)
   return len > 4 ? 8 : len;
 }
 
+/*
+  Virtual_column_info is the class to contain additional
+  characteristics that is specific for a virtual/computed
+  field such as:
+   - the defining expression that is evaluated to compute the value
+  of the field 
+  - whether the field is to be stored in the database
+  - whether the field is used in a partitioning expression
+*/
+
+class Virtual_column_info: public Sql_alloc
+{
+private:
+  /*
+    The following data is only updated by the parser and read
+    when a Create_field object is created/initialized.
+  */
+  enum_field_types field_type;   /* Real field type*/
+  /* Flag indicating  that the field is physically stored in the database */
+  bool stored_in_db;
+  /* Flag indicating that the field used in a partitioning expression */
+  bool in_partitioning_expr;
+
+public:
+  /* The expression to compute the value of the virtual column */
+  Item *expr_item;
+  /* Text representation of the defining expression */
+  LEX_STRING expr_str;
+
+  Virtual_column_info()
+  : field_type((enum enum_field_types)MYSQL_TYPE_VIRTUAL),
+    stored_in_db(FALSE), in_partitioning_expr(FALSE), 
+    expr_item(NULL)
+  {
+    expr_str.str= NULL;
+    expr_str.length= 0;
+  };
+  ~Virtual_column_info() {}
+  enum_field_types get_real_type()
+  {
+    return field_type;
+  }
+  void set_field_type(enum_field_types fld_type)
+  {
+    /* Calling this function can only be done once. */
+    field_type= fld_type;
+  }
+  bool is_stored()
+  {
+    return stored_in_db;
+  }
+  void set_stored_in_db_flag(bool stored)
+  {
+    stored_in_db= stored;
+  }
+  bool is_in_partitioning_expr()
+  {
+    return in_partitioning_expr;
+  }
+  void mark_as_in_partitioning_expr()
+  {
+    in_partitioning_expr= TRUE;
+  }
+};
+
 class Field
 {
   Field(const Item &);				/* Prevent use of these */
@@ -95,20 +163,24 @@ public:
   */
   uchar		*null_ptr;
   /*
-    Note that you can use table->in_use as replacement for current_thd member 
+    Note that you can use table->in_use as replacement for current_thd member
     only inside of val_*() and store() members (e.g. you can't use it in cons)
   */
   TABLE *table;                                 // Pointer for table
   TABLE *orig_table;                            // Pointer to original table
-  const char	**table_name, *field_name;
+  const char * const *table_name;
+  const char *field_name;
+  /** reference to the list of options or NULL */
+  engine_option_value *option_list;
+  ha_field_option_struct *option_struct;   /* structure with parsed options */
   LEX_STRING	comment;
   /* Field is part of the following keys */
   key_map	key_start, part_of_key, part_of_key_not_clustered;
   key_map       part_of_sortkey;
-  /* 
-    We use three additional unireg types for TIMESTAMP to overcome limitation 
-    of current binary format of .frm file. We'd like to be able to support 
-    NOW() as default and on update value for such fields but unable to hold 
+  /*
+    We use three additional unireg types for TIMESTAMP to overcome limitation
+    of current binary format of .frm file. We'd like to be able to support
+    NOW() as default and on update value for such fields but unable to hold
     this info anywhere except unireg_check field. This issue will be resolved
     in more clean way with transition to new text based .frm format.
     See also comment for Field_timestamp::Field_timestamp().
@@ -141,6 +213,19 @@ public:
    */
   bool is_created_from_null_item;
 
+  /* 
+    This is additional data provided for any computed(virtual) field.
+    In particular it includes a pointer to the item by  which this field
+    can be computed from other fields.
+  */
+  Virtual_column_info *vcol_info;
+  /*
+    Flag indicating that the field is physically stored in tables
+    rather than just computed from other fields.
+    As of now, FALSE can be set only for computed virtual columns.
+  */
+  bool stored_in_db;
+
   Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,
         uchar null_bit_arg, utype unireg_check_arg,
         const char *field_name_arg);
@@ -150,7 +235,9 @@ public:
   virtual int  store(double nr)=0;
   virtual int  store(longlong nr, bool unsigned_val)=0;
   virtual int  store_decimal(const my_decimal *d)=0;
-  virtual int store_time(MYSQL_TIME *ltime, timestamp_type t_type);
+  virtual int  store_time_dec(MYSQL_TIME *ltime, uint dec);
+  int store_time(MYSQL_TIME *ltime)
+  { return store_time_dec(ltime, TIME_SECOND_PART_DIGITS); }
   int store(const char *to, uint length, CHARSET_INFO *cs,
             enum_check_fields check_level);
   virtual double val_real(void)=0;
@@ -170,7 +257,7 @@ public:
      This trickery is used to decrease a number of malloc calls.
   */
   virtual String *val_str(String*,String *)=0;
-  String *val_int_as_str(String *val_buffer, my_bool unsigned_flag);
+  String *val_int_as_str(String *val_buffer, bool unsigned_flag);
   /*
    str_needs_quotes() returns TRUE if the value returned by val_str() needs
    to be quoted when used in constructing an SQL query.
@@ -178,7 +265,6 @@ public:
   virtual bool str_needs_quotes() { return FALSE; }
   virtual Item_result result_type () const=0;
   virtual Item_result cmp_type () const { return result_type(); }
-  virtual Item_result cast_to_int_type () const { return result_type(); }
   static bool type_can_have_key_part(enum_field_types);
   static enum_field_types field_type_merge(enum_field_types, enum_field_types);
   static Item_result result_merge_type(enum_field_types);
@@ -279,11 +365,11 @@ public:
     return test(record[(uint) (null_ptr -table->record[0])] &
 		null_bit);
   }
-  inline bool is_null_in_record_with_offset(my_ptrdiff_t offset)
+  inline bool is_null_in_record_with_offset(my_ptrdiff_t col_offset)
   {
     if (!null_ptr)
       return 0;
-    return test(null_ptr[offset] & null_bit);
+    return test(null_ptr[col_offset] & null_bit);
   }
   inline void set_null(my_ptrdiff_t row_offset= 0)
     { if (null_ptr) null_ptr[row_offset]|= null_bit; }
@@ -324,14 +410,6 @@ public:
   virtual void make_field(Send_field *);
   virtual void sort_string(uchar *buff,uint length)=0;
   virtual bool optimize_range(uint idx, uint part);
-  /*
-    This should be true for fields which, when compared with constant
-    items, can be casted to longlong. In this case we will at 'fix_fields'
-    stage cast the constant items to longlongs and at the execution stage
-    use field->val_int() for comparison.  Used to optimize clauses like
-    'a_column BETWEEN date_const, date_const'.
-  */
-  virtual bool can_be_compared_as_longlong() const { return FALSE; }
   virtual void free() {}
   virtual Field *new_field(MEM_ROOT *root, TABLE *new_table,
                            bool keep_type);
@@ -382,7 +460,7 @@ public:
       Number of copied bytes (excluding padded zero bytes -- see above).
   */
 
-  virtual uint get_key_image(uchar *buff, uint length, imagetype type)
+  virtual uint get_key_image(uchar *buff, uint length, imagetype type_arg)
   {
     get_image(buff, length, &my_charset_bin);
     return length;
@@ -415,27 +493,25 @@ public:
   }
   virtual bool send_binary(Protocol *protocol);
 
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
+  virtual uchar *pack(uchar *to, const uchar *from, uint max_length);
   /**
      @overload Field::pack(uchar*, const uchar*, uint, bool)
   */
   uchar *pack(uchar *to, const uchar *from)
   {
     DBUG_ENTER("Field::pack");
-    uchar *result= this->pack(to, from, UINT_MAX, table->s->db_low_byte_first);
+    uchar *result= this->pack(to, from, UINT_MAX);
     DBUG_RETURN(result);
   }
 
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+  virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data);
   /**
      @overload Field::unpack(uchar*, const uchar*, uint, bool)
   */
   const uchar *unpack(uchar* to, const uchar *from)
   {
     DBUG_ENTER("Field::unpack");
-    const uchar *result= unpack(to, from, 0U, table->s->db_low_byte_first);
+    const uchar *result= unpack(to, from, 0);
     DBUG_RETURN(result);
   }
 
@@ -451,7 +527,7 @@ public:
   void copy_from_tmp(int offset);
   uint fill_cache_field(struct st_cache_field *copy);
   virtual bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  virtual bool get_time(MYSQL_TIME *ltime);
+  bool get_time(MYSQL_TIME *ltime) { return get_date(ltime, TIME_TIME_ONLY); }
   virtual CHARSET_INFO *charset(void) const { return &my_charset_bin; }
   virtual CHARSET_INFO *charset_for_protocol(void) const
   { return binary() ? &my_charset_bin : charset(); }
@@ -462,25 +538,25 @@ public:
   { return DERIVATION_IMPLICIT; }
   virtual uint repertoire(void) const { return MY_REPERTOIRE_UNICODE30; }
   virtual void set_derivation(enum Derivation derivation_arg) { }
-  bool set_warning(MYSQL_ERROR::enum_warning_level, unsigned int code,
+  virtual int set_time() { return 1; }
+  void set_warning(MYSQL_ERROR::enum_warning_level, unsigned int code,
                    int cuted_increment);
   void set_datetime_warning(MYSQL_ERROR::enum_warning_level, uint code, 
-                            const char *str, uint str_len,
-                            timestamp_type ts_type, int cuted_increment);
-  void set_datetime_warning(MYSQL_ERROR::enum_warning_level, uint code, 
-                            longlong nr, timestamp_type ts_type,
+                            const ErrConv *str, timestamp_type ts_type,
                             int cuted_increment);
-  void set_datetime_warning(MYSQL_ERROR::enum_warning_level, const uint code, 
-                            double nr, timestamp_type ts_type);
   inline bool check_overflow(int op_result)
   {
     return (op_result == E_DEC_OVERFLOW);
   }
   int warn_if_overflow(int op_result);
+  void set_table_name(String *alias)
+  {
+    table_name= &alias->Ptr;
+  }
   void init(TABLE *table_arg)
   {
     orig_table= table= table_arg;
-    table_name= &table_arg->alias;
+    set_table_name(&table_arg->alias);
   }
 
   /* maximum possible display length */
@@ -509,8 +585,16 @@ public:
     DBUG_ASSERT(0);
     return GEOM_GEOMETRY;
   }
+
+  key_map get_possible_keys();
+
   /* Hash value */
   virtual void hash(ulong *nr, ulong *nr2);
+
+  /* Check whether the field can be used as a join attribute in hash join */
+  virtual bool hash_join_is_possible() { return TRUE; }
+  virtual bool eq_cmp_as_binary() { return TRUE; }
+
   friend int cre_myisam(char * name, register TABLE *form, uint options,
 			ulonglong auto_increment_value);
   friend class Copy_field;
@@ -554,143 +638,34 @@ private:
   { return 0; }
 
 protected:
-  static void handle_int16(uchar *to, const uchar *from,
-                           bool low_byte_first_from, bool low_byte_first_to)
-  {
-    int16 val;
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_from)
-      val = sint2korr(from);
-    else
-#endif
-      shortget(val, from);
-
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_to)
-      int2store(to, val);
-    else
-#endif
-      shortstore(to, val);
-  }
-
-  static void handle_int24(uchar *to, const uchar *from,
-                           bool low_byte_first_from, bool low_byte_first_to)
-  {
-    int32 val;
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_from)
-      val = sint3korr(from);
-    else
-#endif
-      val= (from[0] << 16) + (from[1] << 8) + from[2];
-
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_to)
-      int2store(to, val);
-    else
-#endif
-    {
-      to[0]= 0xFF & (val >> 16);
-      to[1]= 0xFF & (val >> 8);
-      to[2]= 0xFF & val;
-    }
-  }
-
-  /*
-    Helper function to pack()/unpack() int32 values
-  */
-  static void handle_int32(uchar *to, const uchar *from,
-                           bool low_byte_first_from, bool low_byte_first_to)
-  {
-    int32 val;
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_from)
-      val = sint4korr(from);
-    else
-#endif
-      longget(val, from);
-
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_to)
-      int4store(to, val);
-    else
-#endif
-      longstore(to, val);
-  }
-
-  /*
-    Helper function to pack()/unpack() int64 values
-  */
-  static void handle_int64(uchar* to, const uchar *from,
-                           bool low_byte_first_from, bool low_byte_first_to)
-  {
-    int64 val;
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_from)
-      val = sint8korr(from);
-    else
-#endif
-      longlongget(val, from);
-
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_to)
-      int8store(to, val);
-    else
-#endif
-      longlongstore(to, val);
-  }
-
-  uchar *pack_int16(uchar *to, const uchar *from, bool low_byte_first_to)
-  {
-    handle_int16(to, from, table->s->db_low_byte_first, low_byte_first_to);
-    return to  + sizeof(int16);
-  }
-
-  const uchar *unpack_int16(uchar* to, const uchar *from,
-                            bool low_byte_first_from)
-  {
-    handle_int16(to, from, low_byte_first_from, table->s->db_low_byte_first);
-    return from + sizeof(int16);
-  }
-
-  uchar *pack_int24(uchar *to, const uchar *from, bool low_byte_first_to)
+  uchar *pack_int(uchar *to, const uchar *from, size_t size)
   {
-    handle_int24(to, from, table->s->db_low_byte_first, low_byte_first_to);
-    return to + 3;
+    memcpy(to, from, size);
+    return to + size;
   }
 
-  const uchar *unpack_int24(uchar* to, const uchar *from,
-                            bool low_byte_first_from)
+  const uchar *unpack_int(uchar* to, const uchar *from, size_t size)
   {
-    handle_int24(to, from, low_byte_first_from, table->s->db_low_byte_first);
-    return from + 3;
+    memcpy(to, from, size);
+    return from + size;
   }
 
-  uchar *pack_int32(uchar *to, const uchar *from, bool low_byte_first_to)
-  {
-    handle_int32(to, from, table->s->db_low_byte_first, low_byte_first_to);
-    return to  + sizeof(int32);
-  }
-
-  const uchar *unpack_int32(uchar* to, const uchar *from,
-                            bool low_byte_first_from)
-  {
-    handle_int32(to, from, low_byte_first_from, table->s->db_low_byte_first);
-    return from + sizeof(int32);
-  }
-
-  uchar *pack_int64(uchar* to, const uchar *from, bool low_byte_first_to)
-  {
-    handle_int64(to, from, table->s->db_low_byte_first, low_byte_first_to);
-    return to + sizeof(int64);
-  }
-
-  const uchar *unpack_int64(uchar* to, const uchar *from,
-                            bool low_byte_first_from)
-  {
-    handle_int64(to, from, low_byte_first_from, table->s->db_low_byte_first);
-    return from + sizeof(int64);
-  }
+  uchar *pack_int16(uchar *to, const uchar *from)
+  { return pack_int(to, from, 2); }
+  const uchar *unpack_int16(uchar* to, const uchar *from)
+  { return unpack_int(to, from, 2); }
+  uchar *pack_int24(uchar *to, const uchar *from)
+  { return pack_int(to, from, 3); }
+  const uchar *unpack_int24(uchar* to, const uchar *from)
+  { return unpack_int(to, from, 3); }
+  uchar *pack_int32(uchar *to, const uchar *from)
+  { return pack_int(to, from, 4); }
+  const uchar *unpack_int32(uchar* to, const uchar *from)
+  { return unpack_int(to, from, 4); }
+  uchar *pack_int64(uchar* to, const uchar *from)
+  { return pack_int(to, from, 8); }
+  const uchar *unpack_int64(uchar* to, const uchar *from)
+  { return unpack_int(to, from, 8); }
 
   bool field_flags_are_binary()
   {
@@ -708,7 +683,7 @@ public:
 	    uchar null_bit_arg, utype unireg_check_arg,
 	    const char *field_name_arg,
             uint8 dec_arg, bool zero_arg, bool unsigned_arg);
-  Item_result result_type () const { return REAL_RESULT; }
+  enum Item_result result_type () const { return INT_RESULT; }
   enum Derivation derivation(void) const { return DERIVATION_NUMERIC; }
   uint repertoire(void) const { return MY_REPERTOIRE_NUMERIC; }
   CHARSET_INFO *charset(void) const { return &my_charset_numeric; }
@@ -729,6 +704,7 @@ public:
                           field_metadata, length));
     return length;
   }
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
   int check_int(CHARSET_INFO *cs, const char *str, int length,
                 const char *int_end, int error);
   bool get_int(CHARSET_INFO *cs, const char *from, uint len, 
@@ -778,9 +754,9 @@ public:
   my_decimal *val_decimal(my_decimal *);
   virtual bool str_needs_quotes() { return TRUE; }
   uint is_equal(Create_field *new_field);
+  bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); }
 };
 
-
 /* base class for Field_string, Field_varstring and Field_blob */
 
 class Field_longstr :public Field_str
@@ -803,7 +779,7 @@ public:
 /* base class for float and double and decimal (old one) */
 class Field_real :public Field_num {
 public:
-  my_bool not_fixed;
+  bool not_fixed;
 
   Field_real(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
              uchar null_bit_arg, utype unireg_check_arg,
@@ -813,15 +789,13 @@ public:
                field_name_arg, dec_arg, zero_arg, unsigned_arg),
     not_fixed(dec_arg >= NOT_FIXED_DEC)
     {}
+  Item_result result_type () const { return REAL_RESULT; }
   int store_decimal(const my_decimal *);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
   my_decimal *val_decimal(my_decimal *);
-  int truncate(double *nr, double max_length);
   uint32 max_display_length() { return field_length; }
   uint size_of() const { return sizeof(*this); }
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first);
 };
 
 
@@ -850,15 +824,13 @@ public:
   void overflow(bool negative);
   bool zero_pack() const { return 0; }
   void sql_type(String &str) const;
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first)
+  virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data)
   {
-    return Field::unpack(to, from, param_data, low_byte_first);
+    return Field::unpack(to, from, param_data);
   }
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first)
+  virtual uchar *pack(uchar* to, const uchar *from, uint max_length)
   {
-    return Field::pack(to, from, max_length, low_byte_first);
+    return Field::pack(to, from, max_length);
   }
 };
 
@@ -893,7 +865,7 @@ public:
   int  store(const char *to, uint length, CHARSET_INFO *charset);
   int  store(double nr);
   int  store(longlong nr, bool unsigned_val);
-  int store_time(MYSQL_TIME *ltime, timestamp_type t_type);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
   int  store_decimal(const my_decimal *);
   double val_real(void);
   longlong val_int(void);
@@ -911,8 +883,7 @@ public:
   bool compatible_field_size(uint field_metadata, Relay_log_info *rli,
                              uint16 mflags, int *order_var);
   uint is_equal(Create_field *new_field);
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+  virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data);
   static Field *create_from_item (Item *);
 };
 
@@ -927,7 +898,6 @@ public:
 	       unireg_check_arg, field_name_arg,
 	       0, zero_arg,unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_TINY;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_BINARY : HA_KEYTYPE_INT8; }
@@ -945,15 +915,13 @@ public:
   void sql_type(String &str) const;
   uint32 max_display_length() { return 4; }
 
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first)
+  virtual uchar *pack(uchar* to, const uchar *from, uint max_length)
   {
     *to= *from;
     return to + 1;
   }
 
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first)
+  virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data)
   {
     *to= *from;
     return from + 1;
@@ -976,7 +944,6 @@ public:
     :Field_num((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0,
 	       NONE, field_name_arg, 0, 0, unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_SHORT;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_USHORT_INT : HA_KEYTYPE_SHORT_INT;}
@@ -994,17 +961,11 @@ public:
   void sql_type(String &str) const;
   uint32 max_display_length() { return 6; }
 
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first)
-  {
-    return pack_int16(to, from, low_byte_first);
-  }
+  virtual uchar *pack(uchar* to, const uchar *from, uint max_length)
+  { return pack_int16(to, from); }
 
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first)
-  {
-    return unpack_int16(to, from, low_byte_first);
-  }
+  virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data)
+  { return unpack_int16(to, from); }
 };
 
 class Field_medium :public Field_num {
@@ -1017,7 +978,6 @@ public:
 	       unireg_check_arg, field_name_arg,
 	       0, zero_arg,unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_INT24;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_UINT24 : HA_KEYTYPE_INT24; }
@@ -1035,16 +995,14 @@ public:
   void sql_type(String &str) const;
   uint32 max_display_length() { return 8; }
 
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first)
+  virtual uchar *pack(uchar* to, const uchar *from, uint max_length)
   {
-    return Field::pack(to, from, max_length, low_byte_first);
+    return Field::pack(to, from, max_length);
   }
 
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first)
+  virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data)
   {
-    return Field::unpack(to, from, param_data, low_byte_first);
+    return Field::unpack(to, from, param_data);
   }
 };
 
@@ -1064,7 +1022,6 @@ public:
     :Field_num((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0,
 	       NONE, field_name_arg,0,0,unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_LONG;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_ULONG_INT : HA_KEYTYPE_LONG_INT; }
@@ -1082,21 +1039,18 @@ public:
   void sql_type(String &str) const;
   uint32 max_display_length() { return MY_INT32_NUM_DECIMAL_DIGITS; }
   virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length __attribute__((unused)),
-                      bool low_byte_first)
+                      uint max_length __attribute__((unused)))
   {
-    return pack_int32(to, from, low_byte_first);
+    return pack_int32(to, from);
   }
   virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data __attribute__((unused)),
-                              bool low_byte_first)
+                              uint param_data __attribute__((unused)))
   {
-    return unpack_int32(to, from, low_byte_first);
+    return unpack_int32(to, from);
   }
 };
 
 
-#ifdef HAVE_LONG_LONG
 class Field_longlong :public Field_num {
 public:
   Field_longlong(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
@@ -1113,7 +1067,6 @@ public:
     :Field_num((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0,
 	       NONE, field_name_arg,0,0,unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_LONGLONG;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_ULONGLONG : HA_KEYTYPE_LONGLONG; }
@@ -1133,22 +1086,18 @@ public:
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 8; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   uint32 max_display_length() { return 20; }
   virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length  __attribute__((unused)),
-                      bool low_byte_first)
+                      uint max_length  __attribute__((unused)))
   {
-    return pack_int64(to, from, low_byte_first);
+    return pack_int64(to, from);
   }
   virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data __attribute__((unused)),
-                              bool low_byte_first)
+                              uint param_data __attribute__((unused)))
   {
-    return unpack_int64(to, from, low_byte_first);
+    return unpack_int64(to, from);
   }
 };
-#endif
 
 
 class Field_float :public Field_real {
@@ -1202,7 +1151,7 @@ public:
                 NONE, field_name_arg, dec_arg, 0, 0)
     {}
   Field_double(uint32 len_arg, bool maybe_null_arg, const char *field_name_arg,
-	       uint8 dec_arg, my_bool not_fixed_arg)
+	       uint8 dec_arg, bool not_fixed_arg)
     :Field_real((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "" : 0, (uint) 0,
                 NONE, field_name_arg, dec_arg, 0, 0)
     {not_fixed= not_fixed_arg; }
@@ -1256,10 +1205,14 @@ public:
   void sql_type(String &str) const;
   uint size_of() const { return sizeof(*this); }
   uint32 max_display_length() { return 4; }
+  void move_field_offset(my_ptrdiff_t ptr_diff) {}
 };
 
 
 class Field_timestamp :public Field_str {
+protected:
+  int store_TIME_with_warning(THD *, MYSQL_TIME *, const ErrConv *,
+                              bool, bool);
 public:
   Field_timestamp(uchar *ptr_arg, uint32 len_arg,
                   uchar *null_ptr_arg, uchar null_bit_arg,
@@ -1270,7 +1223,7 @@ public:
   enum_field_types type() const { return MYSQL_TYPE_TIMESTAMP;}
   bool match_collation_to_optimize_range() const { return FALSE; }
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; }
-  enum Item_result cmp_type () const { return INT_RESULT; }
+  enum Item_result cmp_type () const { return TIME_RESULT; }
   enum Derivation derivation(void) const { return DERIVATION_NUMERIC; }
   uint repertoire(void) const { return MY_REPERTOIRE_NUMERIC; }
   CHARSET_INFO *charset(void) const { return &my_charset_numeric; }
@@ -1278,7 +1231,7 @@ public:
   int  store(const char *to,uint length,CHARSET_INFO *charset);
   int  store(double nr);
   int  store(longlong nr, bool unsigned_val);
-  int  reset(void) { ptr[0]=ptr[1]=ptr[2]=ptr[3]=0; return 0; }
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
@@ -1287,9 +1240,9 @@ public:
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 4; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 0; }
-  void set_time();
+  uint decimals() const { return 0; }
+  virtual int set_time();
   virtual void set_default()
   {
     if (table->timestamp_field == this &&
@@ -1299,43 +1252,62 @@ public:
       Field::set_default();
   }
   /* Get TIMESTAMP field value as seconds since begging of Unix Epoch */
-  inline long get_timestamp(my_bool *null_value)
+  virtual my_time_t get_timestamp(ulong *sec_part) const;
+  virtual void store_TIME(my_time_t timestamp, ulong sec_part)
   {
-    if ((*null_value= is_null()))
-      return 0;
-#ifdef WORDS_BIGENDIAN
-    if (table && table->s->db_low_byte_first)
-      return sint4korr(ptr);
-#endif
-    long tmp;
-    longget(tmp,ptr);
-    return tmp;
-  }
-  inline void store_timestamp(my_time_t timestamp)
-  {
-#ifdef WORDS_BIGENDIAN
-    if (table && table->s->db_low_byte_first)
-    {
-      int4store(ptr,timestamp);
-    }
-    else
-#endif
-      longstore(ptr,(uint32) timestamp);
+    int4store(ptr,timestamp);
   }
   bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  bool get_time(MYSQL_TIME *ltime);
   timestamp_auto_set_type get_auto_set_type() const;
   uchar *pack(uchar *to, const uchar *from,
-              uint max_length __attribute__((unused)), bool low_byte_first)
+              uint max_length __attribute__((unused)))
   {
-    return pack_int32(to, from, low_byte_first);
+    return pack_int32(to, from);
   }
   const uchar *unpack(uchar* to, const uchar *from,
-                      uint param_data __attribute__((unused)),
-                      bool low_byte_first)
+                      uint param_data __attribute__((unused)))
+  {
+    return unpack_int32(to, from);
+  }
+};
+
+
+class Field_timestamp_hires :public Field_timestamp {
+  uint dec;
+public:
+  Field_timestamp_hires(uchar *ptr_arg,
+                  uchar *null_ptr_arg, uchar null_bit_arg,
+                  enum utype unireg_check_arg, const char *field_name_arg,
+                  TABLE_SHARE *share, uint dec_arg, CHARSET_INFO *cs) :
+  Field_timestamp(ptr_arg, MAX_DATETIME_WIDTH + dec_arg + 1, null_ptr_arg,
+                  null_bit_arg, unireg_check_arg, field_name_arg, share, cs),
+  dec(dec_arg)
   {
-    return unpack_int32(to, from, low_byte_first);
+    DBUG_ASSERT(dec);
+    DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS);
   }
+  void sql_type(String &str) const;
+  my_time_t get_timestamp(ulong *sec_part) const;
+  void store_TIME(my_time_t timestamp, ulong sec_part);
+  int store_decimal(const my_decimal *d);
+  double val_real(void);
+  String *val_str(String*,String *);
+  my_decimal* val_decimal(my_decimal*);
+  bool send_binary(Protocol *protocol);
+  int cmp(const uchar *,const uchar *);
+  void sort_string(uchar *buff,uint length);
+  uint decimals() const { return dec; }
+  int set_time();
+  enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; }
+  void make_field(Send_field *field);
+  uint32 pack_length() const;
+  uchar *pack(uchar *to, const uchar *from, uint max_length)
+  { return Field::pack(to, from, max_length); }
+  const uchar *unpack(uchar* to, const uchar *from, uint param_data)
+  { return Field::unpack(to, from, param_data); }
+  uint size_of() const { return sizeof(*this); }
+  bool eq_def(Field *field)
+  { return Field_str::eq_def(field) && dec == field->decimals(); }
 };
 
 
@@ -1351,90 +1323,94 @@ public:
   int  store(const char *to,uint length,CHARSET_INFO *charset);
   int  store(double nr);
   int  store(longlong nr, bool unsigned_val);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
   bool send_binary(Protocol *protocol);
+  uint32 max_display_length() { return field_length; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
 };
 
 
-class Field_date :public Field_str {
+class Field_temporal: public Field_str {
+protected:
+  int store_TIME_with_warning(MYSQL_TIME *ltime, const ErrConv *str,
+                              int was_cut, int have_smth_to_conv);
+  virtual void store_TIME(MYSQL_TIME *ltime) = 0;
 public:
-  Field_date(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
-	     enum utype unireg_check_arg, const char *field_name_arg,
-	     CHARSET_INFO *cs)
-    :Field_str(ptr_arg, MAX_DATE_WIDTH, null_ptr_arg, null_bit_arg,
-	       unireg_check_arg, field_name_arg, cs)
+  Field_temporal(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg,
+                 uchar null_bit_arg, utype unireg_check_arg,
+                 const char *field_name_arg, CHARSET_INFO *charset_arg)
+    :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg,
+               field_name_arg, charset_arg)
     { flags|= BINARY_FLAG; }
-  Field_date(bool maybe_null_arg, const char *field_name_arg,
-             CHARSET_INFO *cs)
-    :Field_str((uchar*) 0, MAX_DATE_WIDTH, maybe_null_arg ? (uchar*) "": 0,0,
-	       NONE, field_name_arg, cs) { flags|= BINARY_FLAG; }
-  enum_field_types type() const { return MYSQL_TYPE_DATE;}
-  bool match_collation_to_optimize_range() const { return FALSE; }
-  enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; }
-  enum Item_result cmp_type () const { return INT_RESULT; }
   enum Derivation derivation(void) const { return DERIVATION_NUMERIC; }
   uint repertoire(void) const { return MY_REPERTOIRE_NUMERIC; }
   CHARSET_INFO *charset(void) const { return &my_charset_numeric; }
   bool binary() const { return 1; }
-  int store(const char *to,uint length,CHARSET_INFO *charset);
-  int store(double nr);
-  int store(longlong nr, bool unsigned_val);
+  bool match_collation_to_optimize_range() const { return FALSE; }
+  enum Item_result cmp_type () const { return TIME_RESULT; }
+  int  store(const char *to,uint length,CHARSET_INFO *charset);
+  int  store(double nr);
+  int  store(longlong nr, bool unsigned_val);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
+  my_decimal *val_decimal(my_decimal*);
+  bool eq_def(Field *field)
+  {
+    return (Field_str::eq_def(field) && decimals() == field->decimals());
+  }
+};
+
+class Field_date :public Field_temporal {
+  void store_TIME(MYSQL_TIME *ltime);
+public:
+  Field_date(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
+	     enum utype unireg_check_arg, const char *field_name_arg,
+	     CHARSET_INFO *cs)
+    :Field_temporal(ptr_arg, MAX_DATE_WIDTH, null_ptr_arg, null_bit_arg,
+                    unireg_check_arg, field_name_arg, cs) {}
+  enum_field_types type() const { return MYSQL_TYPE_DATE;}
+  enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; }
   int reset(void) { ptr[0]=ptr[1]=ptr[2]=ptr[3]=0; return 0; }
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
-  bool get_time(MYSQL_TIME *ltime);
+  uint decimals() const { return 0; }
   bool send_binary(Protocol *protocol);
   int cmp(const uchar *,const uchar *);
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 4; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 1; }
   uchar *pack(uchar* to, const uchar *from,
-              uint max_length __attribute__((unused)), bool low_byte_first)
+              uint max_length __attribute__((unused)))
   {
-    return pack_int32(to, from, low_byte_first);
+    return pack_int32(to, from);
   }
   const uchar *unpack(uchar* to, const uchar *from,
-                      uint param_data __attribute__((unused)),
-                      bool low_byte_first)
+                      uint param_data __attribute__((unused)))
   {
-    return unpack_int32(to, from, low_byte_first);
+    return unpack_int32(to, from);
   }
 };
 
 
-class Field_newdate :public Field_str {
+class Field_newdate :public Field_temporal {
+  void store_TIME(MYSQL_TIME *ltime);
 public:
   Field_newdate(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
 		enum utype unireg_check_arg, const char *field_name_arg,
 		CHARSET_INFO *cs)
-    :Field_str(ptr_arg, 10, null_ptr_arg, null_bit_arg,
-	       unireg_check_arg, field_name_arg, cs)
-    { flags|= BINARY_FLAG; }
-  Field_newdate(bool maybe_null_arg, const char *field_name_arg,
-                CHARSET_INFO *cs)
-    :Field_str((uchar*) 0,10, maybe_null_arg ? (uchar*) "": 0,0,
-               NONE, field_name_arg, cs) { flags|= BINARY_FLAG; }
+    :Field_temporal(ptr_arg, MAX_DATE_WIDTH, null_ptr_arg, null_bit_arg,
+                    unireg_check_arg, field_name_arg, cs)
+    {}
   enum_field_types type() const { return MYSQL_TYPE_DATE;}
   enum_field_types real_type() const { return MYSQL_TYPE_NEWDATE; }
-  bool match_collation_to_optimize_range() const { return FALSE; }
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_UINT24; }
-  enum Item_result cmp_type () const { return INT_RESULT; }
-  enum Derivation derivation(void) const { return DERIVATION_NUMERIC; }
-  uint repertoire(void) const { return MY_REPERTOIRE_NUMERIC; }
-  CHARSET_INFO *charset(void) const { return &my_charset_numeric; }
-  bool binary() const { return 1; }
-  int  store(const char *to,uint length,CHARSET_INFO *charset);
-  int  store(double nr);
-  int  store(longlong nr, bool unsigned_val);
-  int store_time(MYSQL_TIME *ltime, timestamp_type type);
   int reset(void) { ptr[0]=ptr[1]=ptr[2]=0; return 0; }
+  uint decimals() const { return 0; }
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
@@ -1443,85 +1419,85 @@ public:
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 3; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 1; }
   bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  bool get_time(MYSQL_TIME *ltime);
 };
 
 
-class Field_time :public Field_str {
+class Field_time :public Field_temporal {
+  void store_TIME(MYSQL_TIME *ltime);
 public:
-  Field_time(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
-	     enum utype unireg_check_arg, const char *field_name_arg,
-	     CHARSET_INFO *cs)
-    :Field_str(ptr_arg, 8, null_ptr_arg, null_bit_arg,
-	       unireg_check_arg, field_name_arg, cs)
-    { flags|= BINARY_FLAG; }
-  Field_time(bool maybe_null_arg, const char *field_name_arg,
-             CHARSET_INFO *cs)
-    :Field_str((uchar*) 0,8, maybe_null_arg ? (uchar*) "": 0,0,
-	       NONE, field_name_arg, cs) { flags|= BINARY_FLAG; }
+  Field_time(uchar *ptr_arg, uint length_arg, uchar *null_ptr_arg,
+             uchar null_bit_arg, enum utype unireg_check_arg,
+             const char *field_name_arg, CHARSET_INFO *cs)
+    :Field_temporal(ptr_arg, length_arg, null_ptr_arg, null_bit_arg,
+                    unireg_check_arg, field_name_arg, cs)
+    {}
   enum_field_types type() const { return MYSQL_TYPE_TIME;}
-  bool match_collation_to_optimize_range() const { return FALSE; }
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_INT24; }
-  enum Item_result cmp_type () const { return INT_RESULT; }
-  enum Derivation derivation(void) const { return DERIVATION_NUMERIC; }
-  uint repertoire(void) const { return MY_REPERTOIRE_NUMERIC; }
-  CHARSET_INFO *charset(void) const { return &my_charset_numeric; }
-  bool binary() const { return 1; }
-  int store_time(MYSQL_TIME *ltime, timestamp_type type);
+  int store_time_dec(MYSQL_TIME *ltime, uint dec);
   int store(const char *to,uint length,CHARSET_INFO *charset);
   int store(double nr);
   int store(longlong nr, bool unsigned_val);
-  int reset(void) { ptr[0]=ptr[1]=ptr[2]=0; return 0; }
+  uint decimals() const { return 0; }
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
   bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
   bool send_binary(Protocol *protocol);
-  bool get_time(MYSQL_TIME *ltime);
   int cmp(const uchar *,const uchar *);
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 3; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 1; }
 };
 
+class Field_time_hires :public Field_time {
+  uint dec;
+  longlong zero_point;
+  void store_TIME(MYSQL_TIME *ltime);
+public:
+  Field_time_hires(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
+             enum utype unireg_check_arg, const char *field_name_arg,
+             uint dec_arg, CHARSET_INFO *cs)
+    :Field_time(ptr_arg, MIN_TIME_WIDTH + dec_arg + 1, null_ptr_arg,
+                null_bit_arg, unireg_check_arg, field_name_arg, cs),
+     dec(dec_arg)
+  {
+    DBUG_ASSERT(dec);
+    DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS);
+    zero_point= sec_part_shift(
+                   ((TIME_MAX_VALUE_SECONDS+1LL)*TIME_SECOND_PART_FACTOR), dec);
+  }
+  enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; }
+  uint decimals() const { return dec; }
+  int store_decimal(const my_decimal *d);
+  longlong val_int(void);
+  double val_real(void);
+  String *val_str(String*,String *);
+  int reset(void);
+  bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
+  bool send_binary(Protocol *protocol);
+  int cmp(const uchar *,const uchar *);
+  void sort_string(uchar *buff,uint length);
+  uint32 pack_length() const;
+  void sql_type(String &str) const;
+  void make_field(Send_field *);
+  uint size_of() const { return sizeof(*this); }
+};
 
-class Field_datetime :public Field_str {
+class Field_datetime :public Field_temporal {
+  void store_TIME(MYSQL_TIME *ltime);
 public:
-  Field_datetime(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
-		 enum utype unireg_check_arg, const char *field_name_arg,
-		 CHARSET_INFO *cs)
-    :Field_str(ptr_arg, MAX_DATETIME_WIDTH, null_ptr_arg, null_bit_arg,
-	       unireg_check_arg, field_name_arg, cs)
-    { flags|= BINARY_FLAG; }
-  Field_datetime(bool maybe_null_arg, const char *field_name_arg,
-		 CHARSET_INFO *cs)
-    :Field_str((uchar*) 0, MAX_DATETIME_WIDTH, maybe_null_arg ? (uchar*) "": 0,0,
-	       NONE, field_name_arg, cs) { flags|= BINARY_FLAG; }
+  Field_datetime(uchar *ptr_arg, uint length_arg, uchar *null_ptr_arg,
+                 uchar null_bit_arg, enum utype unireg_check_arg,
+                 const char *field_name_arg, CHARSET_INFO *cs)
+    :Field_temporal(ptr_arg, length_arg, null_ptr_arg, null_bit_arg,
+                    unireg_check_arg, field_name_arg, cs)
+    {}
   enum_field_types type() const { return MYSQL_TYPE_DATETIME;}
-  bool match_collation_to_optimize_range() const { return FALSE; }
-#ifdef HAVE_LONG_LONG
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONGLONG; }
-#endif
-  enum Item_result cmp_type () const { return INT_RESULT; }
-  enum Derivation derivation(void) const { return DERIVATION_NUMERIC; }
-  uint repertoire(void) const { return MY_REPERTOIRE_NUMERIC; }
-  CHARSET_INFO *charset(void) const { return &my_charset_numeric; }
-  bool binary() const { return 1; }
-  uint decimals() const { return DATETIME_DEC; }
-  int  store(const char *to,uint length,CHARSET_INFO *charset);
-  int  store(double nr);
-  int  store(longlong nr, bool unsigned_val);
-  int store_time(MYSQL_TIME *ltime, timestamp_type type);
-  int reset(void)
-  {
-    ptr[0]=ptr[1]=ptr[2]=ptr[3]=ptr[4]=ptr[5]=ptr[6]=ptr[7]=0;
-    return 0;
-  }
+  uint decimals() const { return 0; }
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
@@ -1530,24 +1506,98 @@ public:
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 8; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 1; }
   bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  bool get_time(MYSQL_TIME *ltime);
   uchar *pack(uchar* to, const uchar *from,
-              uint max_length __attribute__((unused)), bool low_byte_first)
+              uint max_length __attribute__((unused)))
   {
-    return pack_int64(to, from, low_byte_first);
+    return pack_int64(to, from);
   }
   const uchar *unpack(uchar* to, const uchar *from,
-                      uint param_data __attribute__((unused)),
-                      bool low_byte_first)
+                      uint param_data __attribute__((unused)))
   {
-    return unpack_int64(to, from, low_byte_first);
+    return unpack_int64(to, from);
   }
 };
 
 
+class Field_datetime_hires :public Field_datetime {
+  void store_TIME(MYSQL_TIME *ltime);
+  uint dec;
+public:
+  Field_datetime_hires(uchar *ptr_arg, uchar *null_ptr_arg,
+                       uchar null_bit_arg, enum utype unireg_check_arg,
+                       const char *field_name_arg, uint dec_arg,
+                       CHARSET_INFO *cs)
+    :Field_datetime(ptr_arg, MAX_DATETIME_WIDTH + dec_arg + 1,
+                    null_ptr_arg, null_bit_arg, unireg_check_arg,
+                    field_name_arg, cs), dec(dec_arg)
+  {
+    DBUG_ASSERT(dec);
+    DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS);
+  }
+  enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; }
+  uint decimals() const { return dec; }
+  void make_field(Send_field *field);
+  int store_decimal(const my_decimal *d);
+  double val_real(void);
+  longlong val_int(void);
+  String *val_str(String*,String *);
+  bool send_binary(Protocol *protocol);
+  int cmp(const uchar *,const uchar *);
+  void sort_string(uchar *buff,uint length);
+  uint32 pack_length() const;
+  void sql_type(String &str) const;
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
+  uchar *pack(uchar *to, const uchar *from, uint max_length)
+  { return Field::pack(to, from, max_length); }
+  const uchar *unpack(uchar* to, const uchar *from, uint param_data)
+  { return Field::unpack(to, from, param_data); }
+  uint size_of() const { return sizeof(*this); }
+};
+
+static inline Field_timestamp *
+new_Field_timestamp(uchar *ptr, uchar *null_ptr, uchar null_bit,
+                    enum Field::utype unireg_check, const char *field_name,
+                    TABLE_SHARE *share, uint dec, CHARSET_INFO *cs)
+{
+  if (dec==0)
+    return new Field_timestamp(ptr, MAX_DATETIME_WIDTH, null_ptr, null_bit,
+                                unireg_check, field_name, share, cs);
+  if (dec == NOT_FIXED_DEC)
+    dec= MAX_DATETIME_PRECISION;
+  return new Field_timestamp_hires(ptr, null_ptr, null_bit, unireg_check,
+                                   field_name, share, dec, cs);
+}
+
+static inline Field_time *
+new_Field_time(uchar *ptr, uchar *null_ptr, uchar null_bit,
+               enum Field::utype unireg_check, const char *field_name,
+               uint dec, CHARSET_INFO *cs)
+{
+  if (dec == 0)
+    return new Field_time(ptr, MIN_TIME_WIDTH, null_ptr, null_bit,
+                          unireg_check, field_name, cs);
+  if (dec == NOT_FIXED_DEC)
+    dec= MAX_DATETIME_PRECISION;
+  return new Field_time_hires(ptr, null_ptr, null_bit,
+                                  unireg_check, field_name, dec, cs);
+}
+
+static inline Field_datetime *
+new_Field_datetime(uchar *ptr, uchar *null_ptr, uchar null_bit,
+                   enum Field::utype unireg_check,
+                   const char *field_name, uint dec, CHARSET_INFO *cs)
+{
+  if (dec == 0)
+    return new Field_datetime(ptr, MAX_DATETIME_WIDTH, null_ptr, null_bit,
+                              unireg_check, field_name, cs);
+  if (dec == NOT_FIXED_DEC)
+    dec= MAX_DATETIME_PRECISION;
+  return new Field_datetime_hires(ptr, null_ptr, null_bit,
+                                  unireg_check, field_name, dec, cs);
+}
+
 class Field_string :public Field_longstr {
 public:
   bool can_alter_field_type;
@@ -1593,9 +1643,8 @@ public:
   void sort_string(uchar *buff,uint length);
   void sql_type(String &str) const;
   virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+                      uint max_length);
+  virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data);
   uint pack_length_from_metadata(uint field_metadata)
   {
     DBUG_PRINT("debug", ("field_metadata: 0x%04x", field_metadata));
@@ -1607,8 +1656,8 @@ public:
                              uint16 mflags, int *order_var);
   uint row_pack_length() { return field_length; }
   int pack_cmp(const uchar *a,const uchar *b,uint key_length,
-               my_bool insert_or_update);
-  int pack_cmp(const uchar *b,uint key_length,my_bool insert_or_update);
+               bool insert_or_update);
+  int pack_cmp(const uchar *b,uint key_length,bool insert_or_update);
   uint packed_col_length(const uchar *to, uint length);
   uint max_packed_col_length(uint max_length);
   uint size_of() const { return sizeof(*this); }
@@ -1681,10 +1730,8 @@ public:
   uint get_key_image(uchar *buff,uint length, imagetype type);
   void set_key_image(const uchar *buff,uint length);
   void sql_type(String &str) const;
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+  uchar *pack(uchar *to, const uchar *from, uint max_length);
+  const uchar *unpack(uchar* to, const uchar *from, uint param_data);
   int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0L);
   int key_cmp(const uchar *,const uchar*);
   int key_cmp(const uchar *str, uint length);
@@ -1790,14 +1837,7 @@ public:
   int reset(void) { bzero(ptr, packlength+sizeof(uchar*)); return 0; }
   void reset_fields() { bzero((uchar*) &value,sizeof(value)); }
   uint32 get_field_buffer_size(void) { return value.alloced_length(); }
-#ifndef WORDS_BIGENDIAN
-  static
-#endif
-  void store_length(uchar *i_ptr, uint i_packlength, uint32 i_number, bool low_byte_first);
-  void store_length(uchar *i_ptr, uint i_packlength, uint32 i_number)
-  {
-    store_length(i_ptr, i_packlength, i_number, table->s->db_low_byte_first);
-  }
+  void store_length(uchar *i_ptr, uint i_packlength, uint32 i_number);
   inline void store_length(uint32 number)
   {
     store_length(ptr, packlength, number);
@@ -1811,15 +1851,14 @@ public:
 
      @returns The length in the row plus the size of the data.
   */
-  uint32 get_packed_size(const uchar *ptr_arg, bool low_byte_first)
-    {return packlength + get_length(ptr_arg, packlength, low_byte_first);}
+  uint32 get_packed_size(const uchar *ptr_arg)
+    {return packlength + get_length(ptr_arg, packlength);}
 
   inline uint32 get_length(uint row_offset= 0)
-  { return get_length(ptr+row_offset, this->packlength, table->s->db_low_byte_first); }
-  uint32 get_length(const uchar *ptr, uint packlength, bool low_byte_first);
+  { return get_length(ptr+row_offset, this->packlength); }
+  uint32 get_length(const uchar *ptr, uint packlength);
   uint32 get_length(const uchar *ptr_arg)
-  { return get_length(ptr_arg, this->packlength, table->s->db_low_byte_first); }
-  void put_length(uchar *pos, uint32 length);
+  { return get_length(ptr_arg, this->packlength); }
   inline void get_ptr(uchar **str)
     {
       memcpy(str, ptr+packlength, sizeof(uchar*));
@@ -1840,9 +1879,9 @@ public:
       memcpy(ptr_ofs+packlength, &data, sizeof(char*));
     }
   inline void set_ptr(uint32 length, uchar *data)
-    {
-      set_ptr_offset(0, length, data);
-    }
+  {
+    set_ptr_offset(0, length, data);
+  }
   uint get_key_image(uchar *buff,uint length, imagetype type);
   void set_key_image(const uchar *buff,uint length);
   void sql_type(String &str) const;
@@ -1859,10 +1898,8 @@ public:
     memcpy(ptr+packlength, &tmp, sizeof(char*));
     return 0;
   }
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
-  virtual const uchar *unpack(uchar *to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+  uchar *pack(uchar *to, const uchar *from, uint max_length);
+  const uchar *unpack(uchar *to, const uchar *from, uint param_data);
   uint packed_col_length(const uchar *col_ptr, uint length);
   uint max_packed_col_length(uint max_length);
   void free() { value.free(); }
@@ -1939,7 +1976,6 @@ public:
   enum_field_types type() const { return MYSQL_TYPE_STRING; }
   bool match_collation_to_optimize_range() const { return FALSE; }
   enum Item_result cmp_type () const { return INT_RESULT; }
-  enum Item_result cast_to_int_type () const { return INT_RESULT; }
   enum ha_base_keytype key_type() const;
   int  store(const char *to,uint length,CHARSET_INFO *charset);
   int  store(double nr);
@@ -1964,10 +2000,8 @@ public:
   /* enum and set are sorted as integers */
   CHARSET_INFO *sort_charset(void) const { return &my_charset_bin; }
 
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
-  virtual const uchar *unpack(uchar *to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+  virtual uchar *pack(uchar *to, const uchar *from, uint max_length);
+  virtual const uchar *unpack(uchar *to, const uchar *from, uint param_data);
 
 private:
   int do_save_field_metadata(uchar *first_byte);
@@ -2078,10 +2112,8 @@ public:
   bool compatible_field_size(uint metadata, Relay_log_info *rli,
                              uint16 mflags, int *order_var);
   void sql_type(String &str) const;
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
-  virtual const uchar *unpack(uchar *to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+  virtual uchar *pack(uchar *to, const uchar *from, uint max_length);
+  virtual const uchar *unpack(uchar *to, const uchar *from, uint param_data);
   virtual void set_default();
 
   Field *new_key_field(MEM_ROOT *root, TABLE *new_table,
@@ -2166,14 +2198,31 @@ public:
   CHARSET_INFO *charset;
   Field::geometry_type geom_type;
   Field *field;				// For alter table
+  engine_option_value *option_list;
+  /** structure with parsed options (for comparing fields in ALTER TABLE) */
+  ha_field_option_struct *option_struct;
 
   uint8 row,col,sc_length,interval_id;	// For rea_create_table
   uint	offset,pack_flag;
-  Create_field() :after(0) {}
+
+    /* 
+    This is additinal data provided for any computed(virtual) field.
+    In particular it includes a pointer to the item by  which this field
+    can be computed from other fields.
+  */
+  Virtual_column_info *vcol_info;
+  /*
+    Flag indicating that the field is physically stored in tables
+    rather than just computed from other fields.
+    As of now, FALSE can be set only for computed virtual columns.
+  */
+  bool stored_in_db;
+
+  Create_field() :after(0), option_list(NULL), option_struct(NULL)
+  {}
   Create_field(Field *field, Field *orig_field);
   /* Used to make a clone of this object for ALTER/CREATE TABLE */
-  Create_field *clone(MEM_ROOT *mem_root) const
-    { return new (mem_root) Create_field(*this); }
+  Create_field *clone(MEM_ROOT *mem_root) const;
   void create_length_to_internal_length(void);
 
   /* Init for a tmp table field. To be extended if need be. */
@@ -2186,7 +2235,8 @@ public:
             char *decimals, uint type_modifier, Item *default_value,
             Item *on_update_value, LEX_STRING *comment, char *change,
             List<String> *interval_list, CHARSET_INFO *cs,
-            uint uint_geom_type);
+            uint uint_geom_type, Virtual_column_info *vcol_info,
+            engine_option_value *option_list);
 
   bool field_flags_are_binary()
   {
@@ -2225,7 +2275,7 @@ class Copy_field :public Sql_alloc {
 public:
   uchar *from_ptr,*to_ptr;
   uchar *from_null_ptr,*to_null_ptr;
-  my_bool *null_row;
+  bool *null_row;
   uint	from_bit,to_bit;
   uint from_length,to_length;
   Field *from_field,*to_field;
diff --git a/sql/field_conv.cc b/sql/field_conv.cc
index 1a3ac9de08b..8808d73f622 100644
--- a/sql/field_conv.cc
+++ b/sql/field_conv.cc
@@ -208,6 +208,14 @@ static void do_skip(Copy_field *copy __attribute__((unused)))
 }
 
 
+/* 
+  Copy: (NULLable field) -> (NULLable field) 
+
+  note: if the record we're copying from is NULL-complemetned (i.e. 
+  from_field->table->null_row==1), it will also have all NULLable columns to be
+  set to NULLs, so we dont need to check table->null_row here.
+*/
+
 static void do_copy_null(Copy_field *copy)
 {
   if (*copy->from_null_ptr & copy->from_bit)
@@ -222,6 +230,10 @@ static void do_copy_null(Copy_field *copy)
   }
 }
 
+/*
+  Copy: (not-NULL field in table that can be NULL-complemented) -> (NULLable 
+     field)
+*/
 
 static void do_outer_field_null(Copy_field *copy)
 {
@@ -239,6 +251,7 @@ static void do_outer_field_null(Copy_field *copy)
 }
 
 
+/* Copy: (NULL-able field) -> (not NULL-able field) */
 static void do_copy_not_null(Copy_field *copy)
 {
   if (*copy->from_null_ptr & copy->from_bit)
@@ -252,6 +265,7 @@ static void do_copy_not_null(Copy_field *copy)
 }
 
 
+/* Copy: (non-NULLable field) -> (NULLable field) */
 static void do_copy_maybe_null(Copy_field *copy)
 {
   *copy->to_null_ptr&= ~copy->to_bit;
@@ -369,6 +383,14 @@ static void do_field_decimal(Copy_field *copy)
 }
 
 
+static void do_field_temporal(Copy_field *copy)
+{
+  MYSQL_TIME ltime;
+  copy->from_field->get_date(&ltime, TIME_FUZZY_DATE);
+  copy->to_field->store_time_dec(&ltime, copy->from_field->decimals());
+}
+
+
 /**
   string copy for single byte characters set when to string is shorter than
   from string.
@@ -453,7 +475,8 @@ static void do_varstring1(Copy_field *copy)
   if (length > copy->to_length- 1)
   {
     length=copy->to_length - 1;
-    if (copy->from_field->table->in_use->count_cuted_fields)
+    if (copy->from_field->table->in_use->count_cuted_fields &&
+        copy->to_field)
       copy->to_field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                                   WARN_DATA_TRUNCATED, 1);
   }
@@ -489,7 +512,8 @@ static void do_varstring2(Copy_field *copy)
   if (length > copy->to_length- HA_KEY_BLOB_LENGTH)
   {
     length=copy->to_length-HA_KEY_BLOB_LENGTH;
-    if (copy->from_field->table->in_use->count_cuted_fields)
+    if (copy->from_field->table->in_use->count_cuted_fields &&
+        copy->to_field)
       copy->to_field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                                   WARN_DATA_TRUNCATED, 1);
   }
@@ -553,9 +577,9 @@ void Copy_field::set(uchar *to,Field *from)
       do_copy=	  do_field_to_null_str;
   }
   else
-  {
+  { 
     to_null_ptr=  0;				// For easy debugging
-    do_copy=	  do_field_eq;
+    do_copy= do_field_eq;
   }
 }
 
@@ -563,7 +587,7 @@ void Copy_field::set(uchar *to,Field *from)
 /*
   To do: 
 
-  If 'save\ is set to true and the 'from' is a blob field, do_copy is set to
+  If 'save' is set to true and the 'from' is a blob field, do_copy is set to
   do_save_blob rather than do_conv_blob.  The only differences between them
   appears to be:
 
@@ -640,13 +664,11 @@ void Copy_field::set(Field *to,Field *from,bool save)
 Copy_field::Copy_func *
 Copy_field::get_copy_func(Field *to,Field *from)
 {
-  bool compatible_db_low_byte_first= (to->table->s->db_low_byte_first ==
-                                     from->table->s->db_low_byte_first);
   if (to->flags & BLOB_FLAG)
   {
     if (!(from->flags & BLOB_FLAG) || from->charset() != to->charset())
       return do_conv_blob;
-    if (from_length != to_length || !compatible_db_low_byte_first)
+    if (from_length != to_length)
     {
       // Correct pointer to point at char pointer
       to_ptr+=   to_length - to->table->s->blob_ptr_size;
@@ -661,6 +683,16 @@ Copy_field::get_copy_func(Field *to,Field *from)
       return do_field_int;
     if (to->result_type() == DECIMAL_RESULT)
       return do_field_decimal;
+    if (from->cmp_type() == TIME_RESULT)
+    {
+      /* If types are not 100 % identical then convert trough get_date() */
+      if (!to->eq_def(from) ||
+          ((to->table->in_use->variables.sql_mode &
+            (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE)) &&
+             mysql_type_to_time_type(to->type()) != MYSQL_TIMESTAMP_TIME))
+        return do_field_temporal;
+      /* Do binary copy */
+    }
     // Check if identical fields
     if (from->result_type() == STRING_RESULT)
     {
@@ -673,16 +705,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
           to->type() == MYSQL_TYPE_VARCHAR && !to->has_charset())
         return do_field_varbinary_pre50;
 
-      /*
-        If we are copying date or datetime's we have to check the dates
-        if we don't allow 'all' dates.
-      */
-      if (to->real_type() != from->real_type() ||
-          !compatible_db_low_byte_first ||
-          (((to->table->in_use->variables.sql_mode &
-            (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE | MODE_INVALID_DATES)) &&
-           to->type() == MYSQL_TYPE_DATE) ||
-           to->type() == MYSQL_TYPE_DATETIME))
+      if (to->real_type() != from->real_type())
       {
 	if (from->real_type() == MYSQL_TYPE_ENUM ||
 	    from->real_type() == MYSQL_TYPE_SET)
@@ -698,8 +721,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
           if (from->real_type() == MYSQL_TYPE_ENUM &&
               to->real_type() == MYSQL_TYPE_ENUM)
             return do_field_enum;
-          else
-            return do_field_string;
+          return do_field_string;
         }
       }
       else if (to->charset() != from->charset())
@@ -715,6 +737,9 @@ Copy_field::get_copy_func(Field *to,Field *from)
                                                     do_varstring1_mb) :
                   (from->charset()->mbmaxlen == 1 ? do_varstring2 :
                                                     do_varstring2_mb));
+        else 
+          return  (((Field_varstring*) from)->length_bytes == 1 ?
+                    do_varstring1 : do_varstring2);
       }
       else if (to_length < from_length)
 	return (from->charset()->mbmaxlen == 1 ?
@@ -723,14 +748,11 @@ Copy_field::get_copy_func(Field *to,Field *from)
       {
         if (to->charset() == &my_charset_bin)
           return do_expand_binary;
-        else
-          return do_expand_string;
+        return do_expand_string;
       }
-
     }
     else if (to->real_type() != from->real_type() ||
-	     to_length != from_length ||
-             !compatible_db_low_byte_first)
+	     to_length != from_length)
     {
       if (to->real_type() == MYSQL_TYPE_DECIMAL ||
 	  to->result_type() == STRING_RESULT)
@@ -741,7 +763,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
     }
     else
     {
-      if (!to->eq_def(from) || !compatible_db_low_byte_first)
+      if (!to->eq_def(from))
       {
 	if (to->real_type() == MYSQL_TYPE_DECIMAL)
 	  return do_field_string;
@@ -752,7 +774,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
       }
     }
   }
-    /* Eq fields */
+  /* Identical field types */
   switch (to_length) {
   case 1: return do_field_1;
   case 2: return do_field_2;
@@ -774,14 +796,13 @@ int field_conv(Field *to,Field *from)
   {
     if (to->pack_length() == from->pack_length() &&
         !(to->flags & UNSIGNED_FLAG && !(from->flags & UNSIGNED_FLAG)) &&
+        to->decimals() == from->decimals() &&
 	to->real_type() != MYSQL_TYPE_ENUM &&
 	to->real_type() != MYSQL_TYPE_SET &&
         to->real_type() != MYSQL_TYPE_BIT &&
         (to->real_type() != MYSQL_TYPE_NEWDECIMAL ||
-         (to->field_length == from->field_length &&
-          (((Field_num*)to)->dec == ((Field_num*)from)->dec))) &&
+         to->field_length == from->field_length) &&
         from->charset() == to->charset() &&
-	to->table->s->db_low_byte_first == from->table->s->db_low_byte_first &&
         (!(to->table->in_use->variables.sql_mode &
            (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE | MODE_INVALID_DATES)) ||
          (to->type() != MYSQL_TYPE_DATE &&
@@ -790,8 +811,13 @@ int field_conv(Field *to,Field *from)
          ((Field_varstring*)from)->length_bytes ==
           ((Field_varstring*)to)->length_bytes))
     {						// Identical fields
-      // to->ptr==from->ptr may happen if one does 'UPDATE ... SET x=x'
-      memmove(to->ptr, from->ptr, to->pack_length());
+      /*
+        This may happen if one does 'UPDATE ... SET x=x'
+        The test is here mostly for valgrind, but can also be relevant
+        if memcpy() is implemented with prefetch-write
+       */
+      if (to->ptr != from->ptr)
+        memcpy(to->ptr,from->ptr,to->pack_length());
       return 0;
     }
   }
@@ -817,7 +843,22 @@ int field_conv(Field *to,Field *from)
     ((Field_enum *)(to))->store_type(0);
     return 0;
   }
-  else if ((from->result_type() == STRING_RESULT &&
+  if (from->result_type() == REAL_RESULT)
+    return to->store(from->val_real());
+  if (from->result_type() == DECIMAL_RESULT)
+  {
+    my_decimal buff;
+    return to->store_decimal(from->val_decimal(&buff));
+  }
+  if (from->cmp_type() == TIME_RESULT)
+  {
+    MYSQL_TIME ltime;
+    if (from->get_date(&ltime, TIME_FUZZY_DATE))
+      return to->reset();
+    else
+      return to->store_time_dec(&ltime, from->decimals());
+  }
+  if ((from->result_type() == STRING_RESULT &&
             (to->result_type() == STRING_RESULT ||
              (from->real_type() != MYSQL_TYPE_ENUM &&
               from->real_type() != MYSQL_TYPE_SET))) ||
@@ -834,13 +875,5 @@ int field_conv(Field *to,Field *from)
     */
     return to->store(result.c_ptr_quick(),result.length(),from->charset());
   }
-  else if (from->result_type() == REAL_RESULT)
-    return to->store(from->val_real());
-  else if (from->result_type() == DECIMAL_RESULT)
-  {
-    my_decimal buff;
-    return to->store_decimal(from->val_decimal(&buff));
-  }
-  else
-    return to->store(from->val_int(), test(from->flags & UNSIGNED_FLAG));
+  return to->store(from->val_int(), test(from->flags & UNSIGNED_FLAG));
 }
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 88e9353abe3..ebef3b2716b 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -30,8 +30,10 @@
 #include <m_ctype.h>
 #include "sql_sort.h"
 #include "probes_mysql.h"
+#include "sql_base.h"                           // update_virtual_fields
 #include "sql_test.h"                           // TEST_filesort
 #include "opt_range.h"                          // SQL_SELECT
+#include "log_slow.h"
 #include "debug_sync.h"
 
 /// How to write record_ref.
@@ -52,10 +54,6 @@ static int write_keys(SORTPARAM *param,uchar * *sort_keys,
 		      uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
 static void make_sortkey(SORTPARAM *param,uchar *to, uchar *ref_pos);
 static void register_used_fields(SORTPARAM *param);
-static int merge_index(SORTPARAM *param,uchar *sort_buffer,
-		       BUFFPEK *buffpek,
-		       uint maxbuffer,IO_CACHE *tempfile,
-		       IO_CACHE *outfile);
 static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count, 
                        FILESORT_INFO *table_sort);
 static uint suffix_length(ulong string_length);
@@ -148,8 +146,6 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   bzero((char*) &param,sizeof(param));
   param.sort_length= sortlength(thd, sortorder, s_length, &multi_byte_charset);
   param.ref_length= table->file->ref_length;
-  param.addon_field= 0;
-  param.addon_length= 0;
   if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
       !table->fulltext_searched && !sort_positions)
   {
@@ -193,6 +189,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   {
     status_var_increment(thd->status_var.filesort_scan_count);
   }
+  thd->query_plan_flags|= QPLAN_FILESORT;
 #ifdef CAN_TRUST_RANGE
   if (select && select->quick && select->quick->records > 0L)
   {
@@ -239,7 +236,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     goto err;
   }
   if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
-		       DISK_BUFFER_SIZE, MYF(MY_WME)))
+		       DISK_BUFFER_SIZE, MYF(ME_ERROR | MY_WME)))
     goto err;
 
   param.keys--;  			/* TODO: check why we do this */
@@ -258,6 +255,8 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   }
   else
   {
+    thd->query_plan_flags|= QPLAN_FILESORT_DISK;
+
     /* filesort cannot handle zero-length records during merge. */
     DBUG_ASSERT(param.sort_length != 0);
 
@@ -276,7 +275,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 	/* Open cached file if it isn't open */
     if (! my_b_inited(outfile) &&
 	open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
-			  MYF(MY_WME)))
+			  MYF(ME_ERROR | MY_WME)))
       goto err;
     if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0))
       goto err;
@@ -332,7 +331,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     DBUG_ASSERT(thd->is_error() || kill_errno);
     my_printf_error(ER_FILSORT_ABORT,
                     "%s: %s",
-                    MYF(ME_ERROR + ME_WAITTANG),
+                    MYF(0),
                     ER_THD(thd, ER_FILSORT_ABORT),
                     kill_errno ? ER(kill_errno) : thd->stmt_da->message());
                     
@@ -529,8 +528,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   THD *thd= current_thd;
   volatile THD::killed_state *killed= &thd->killed;
   handler *file;
-  MY_BITMAP *save_read_set, *save_write_set;
-  bool skip_record;
+  MY_BITMAP *save_read_set, *save_write_set, *save_vcol_set;
   DBUG_ENTER("find_all_keys");
   DBUG_PRINT("info",("using: %s",
                      (select ? select->quick ? "ranges" : "where":
@@ -544,7 +542,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   ref_pos= ref_buff;
   quick_select=select && select->quick;
   record=0;
-  flag= ((!indexfile && file->ha_table_flags() & HA_REC_NOT_IN_SEQ)
+  flag= ((!indexfile && (file->ha_table_flags() & HA_REC_NOT_IN_SEQ))
 	 || quick_select);
   if (indexfile || flag)
     ref_pos= &file->ref[0];
@@ -552,20 +550,17 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   if (! indexfile && ! quick_select)
   {
     next_pos=(uchar*) 0;			/* Find records in sequence */
-    file->ha_rnd_init(1);
+    if (file->ha_rnd_init_with_error(1))
+      DBUG_RETURN(HA_POS_ERROR);
     file->extra_opt(HA_EXTRA_CACHE,
 		    current_thd->variables.read_buff_size);
   }
 
-  if (quick_select)
-  {
-    if (select->quick->reset())
-      DBUG_RETURN(HA_POS_ERROR);
-  }
 
   /* Remember original bitmaps */
   save_read_set=  sort_form->read_set;
   save_write_set= sort_form->write_set;
+  save_vcol_set= sort_form->vcol_set;
   /* Set up temporary column read map for columns used by sort */
   bitmap_clear_all(&sort_form->tmp_set);
   /* Temporary set for register_used_fields and register_field_in_read_map */
@@ -574,7 +569,18 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   if (select && select->cond)
     select->cond->walk(&Item::register_field_in_read_map, 1,
                        (uchar*) sort_form);
-  sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set);
+  if (select && select->pre_idx_push_select_cond)
+    select->pre_idx_push_select_cond->walk(&Item::register_field_in_read_map,
+                                           1, (uchar*) sort_form);
+  sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set, 
+                                &sort_form->tmp_set);
+
+
+  if (quick_select)
+  {
+    if (select->quick->reset())
+      DBUG_RETURN(HA_POS_ERROR);
+  }
 
   for (;;)
   {
@@ -582,6 +588,8 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     {
       if ((error= select->quick->get_next()))
         break;
+      if (!error)
+        update_virtual_fields(thd, sort_form);
       file->position(sort_form->record[0]);
       DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE););
     }
@@ -594,11 +602,13 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 	  error= my_errno ? my_errno : -1;		/* Abort */
 	  break;
 	}
-	error=file->rnd_pos(sort_form->record[0],next_pos);
+	error=file->ha_rnd_pos(sort_form->record[0],next_pos);
       }
       else
       {
-	error=file->rnd_next(sort_form->record[0]);
+	error=file->ha_rnd_next(sort_form->record[0]);
+	if (!error)
+	  update_virtual_fields(thd, sort_form);
 	if (!flag)
 	{
 	  my_store_ptr(ref_pos,ref_length,record); // Position to row
@@ -621,10 +631,34 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
       }
       DBUG_RETURN(HA_POS_ERROR);		/* purecov: inspected */
     }
+
+    bool write_record= false;
     if (error == 0)
+    {
       param->examined_rows++;
-    if (!error && (!select ||
-                   (!select->skip_record(thd, &skip_record) && !skip_record)))
+      if (select && select->cond)
+      {
+        /*
+          If the condition 'select->cond' contains a subquery, restore the
+          original read/write sets of the table 'sort_form' because when
+          SQL_SELECT::skip_record evaluates this condition. it may include a
+          correlated subquery predicate, such that some field in the subquery
+          refers to 'sort_form'.
+        */
+        if (select->cond->with_subselect)
+          sort_form->column_bitmaps_set(save_read_set, save_write_set,
+                                        save_vcol_set);
+        write_record= (select->skip_record(thd) > 0);
+        if (select->cond->with_subselect)
+          sort_form->column_bitmaps_set(&sort_form->tmp_set,
+                                        &sort_form->tmp_set,
+                                        &sort_form->tmp_set);
+      }
+      else
+        write_record= true;
+    }
+
+    if (write_record)
     {
       if (idx == param->keys)
       {
@@ -637,6 +671,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     }
     else
       file->unlock_row();
+
     /* It does not make sense to read more keys in case of a fatal error */
     if (thd->is_error())
       break;
@@ -652,7 +687,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     DBUG_RETURN(HA_POS_ERROR);
   
   /* Signal we should use orignal column read and write maps */
-  sort_form->column_bitmaps_set(save_read_set, save_write_set);
+  sort_form->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set);
 
   DBUG_PRINT("test",("error: %d  indexpos: %d",error,indexpos));
   if (error != HA_ERR_END_OF_FILE)
@@ -858,25 +893,28 @@ static void make_sortkey(register SORTPARAM *param,
         break;
       }
       case INT_RESULT:
+      case TIME_RESULT:
 	{
-          longlong value= item->val_int_result();
+          longlong UNINIT_VAR(value);
+          if (sort_field->result_type == INT_RESULT)
+            value= item->val_int_result();
+          else
+          {
+            MYSQL_TIME buf;
+            if (item->get_date_result(&buf, TIME_FUZZY_DATE | TIME_INVALID_DATES))
+              DBUG_ASSERT(maybe_null && item->null_value);
+            else
+              value= pack_time(&buf);
+          }
           if (maybe_null)
           {
-	    *to++=1;				/* purecov: inspected */
             if (item->null_value)
             {
-              if (maybe_null)
-                bzero((char*) to-1,sort_field->length+1);
-              else
-              {
-                DBUG_PRINT("warning",
-                           ("Got null on something that shouldn't be null"));
-                bzero((char*) to,sort_field->length);
-              }
+              bzero((char*) to++, sort_field->length+1);
               break;
             }
+	    *to++=1;				/* purecov: inspected */
           }
-#if SIZEOF_LONG_LONG > 4
 	  to[7]= (uchar) value;
 	  to[6]= (uchar) (value >> 8);
 	  to[5]= (uchar) (value >> 16);
@@ -888,15 +926,6 @@ static void make_sortkey(register SORTPARAM *param,
             to[0]= (uchar) (value >> 56);
           else
             to[0]= (uchar) (value >> 56) ^ 128;	/* Reverse signbit */
-#else
-	  to[3]= (uchar) value;
-	  to[2]= (uchar) (value >> 8);
-	  to[1]= (uchar) (value >> 16);
-          if (item->unsigned_flag)                    /* Fix sign */
-            to[0]= (uchar) (value >> 24);
-          else
-            to[0]= (uchar) (value >> 24) ^ 128;	/* Reverse signbit */
-#endif
 	  break;
 	}
       case DECIMAL_RESULT:
@@ -906,8 +935,7 @@ static void make_sortkey(register SORTPARAM *param,
           {
             if (item->null_value)
             { 
-              bzero((char*)to, sort_field->length+1);
-              to++;
+              bzero((char*) to++, sort_field->length+1);
               break;
             }
             *to++=1;
@@ -1008,7 +1036,14 @@ static void register_used_fields(SORTPARAM *param)
     if ((field= sort_field->field))
     {
       if (field->table == table)
-      bitmap_set_bit(bitmap, field->field_index);
+      {
+        if (field->vcol_info)
+	{
+          Item *vcol_item= field->vcol_info->expr_item;
+          vcol_item->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+        }                   
+        bitmap_set_bit(bitmap, field->field_index);
+      }
     }
     else
     {						// Item
@@ -1151,7 +1186,9 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
 {
   uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
-  for (uint i= 0; i < queue->elements; ++i)
+  for (uint i= queue_first_element(queue);
+       i <= queue_last_element(queue);
+       i++)
   {
     BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
     if (bp->base + bp->max_keys * key_length == reuse->base)
@@ -1204,11 +1241,15 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   QUEUE queue;
   qsort2_cmp cmp;
   void *first_cmp_arg;
-  volatile THD::killed_state *killed= &current_thd->killed;
+  element_count dupl_count= 0;
+  uchar *src;
   THD::killed_state not_killable;
+  uchar *unique_buff= param->unique_buff;
+  volatile THD::killed_state *killed= &current_thd->killed;
   DBUG_ENTER("merge_buffers");
 
   status_var_increment(current_thd->status_var.filesort_merge_passes);
+  current_thd->query_plan_fsort_passes++;
   if (param->not_killable)
   {
     killed= &not_killable;
@@ -1219,16 +1260,22 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   rec_length= param->rec_length;
   res_length= param->res_length;
   sort_length= param->sort_length;
-  offset= rec_length-res_length;
+  uint dupl_count_ofs= rec_length-sizeof(element_count);
+  uint min_dupl_count= param->min_dupl_count;
+  bool check_dupl_count= flag && min_dupl_count;
+  offset= (rec_length-
+           (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length);
+  uint wr_len= flag ? res_length : rec_length;
+  uint wr_offset= flag ? offset : 0;
   maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1));
   to_start_filepos= my_b_tell(to_file);
-  strpos= (uchar*) sort_buffer;
+  strpos= sort_buffer;
   org_max_rows=max_rows= param->max_rows;
 
   /* The following will fire if there is not enough space in sort_buffer */
   DBUG_ASSERT(maxcount!=0);
   
-  if (param->unique_buff)
+  if (unique_buff)
   {
     cmp= param->compare;
     first_cmp_arg= (void *) &param->cmp_context;
@@ -1239,44 +1286,45 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     first_cmp_arg= (void*) &sort_length;
   }
   if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
-                 (queue_compare) cmp, first_cmp_arg))
+                 (queue_compare) cmp, first_cmp_arg, 0, 0))
     DBUG_RETURN(1);                                /* purecov: inspected */
   for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
   {
     buffpek->base= strpos;
     buffpek->max_keys= maxcount;
     strpos+= (uint) (error= (int) read_to_buffer(from_file, buffpek,
-                                                                         rec_length));
+                                                 rec_length));
     if (error == -1)
       goto err;					/* purecov: inspected */
     buffpek->max_keys= buffpek->mem_count;	// If less data in buffers than expected
     queue_insert(&queue, (uchar*) buffpek);
   }
 
-  if (param->unique_buff)
+  if (unique_buff)
   {
     /* 
        Called by Unique::get()
-       Copy the first argument to param->unique_buff for unique removal.
+       Copy the first argument to unique_buff for unique removal.
        Store it also in 'to_file'.
-
-       This is safe as we know that there is always more than one element
-       in each block to merge (This is guaranteed by the Unique:: algorithm
     */
     buffpek= (BUFFPEK*) queue_top(&queue);
-    memcpy(param->unique_buff, buffpek->key, rec_length);
-    if (my_b_write(to_file, (uchar*) buffpek->key, rec_length))
-    {
-      error=1; goto err;                        /* purecov: inspected */
-    }
+    memcpy(unique_buff, buffpek->key, rec_length);
+    if (min_dupl_count)
+      memcpy(&dupl_count, unique_buff+dupl_count_ofs, 
+             sizeof(dupl_count));
     buffpek->key+= rec_length;
-    buffpek->mem_count--;
-    if (!--max_rows)
+    if (! --buffpek->mem_count)
     {
-      error= 0;                                       /* purecov: inspected */
-      goto end;                                       /* purecov: inspected */
+      if (!(error= (int) read_to_buffer(from_file, buffpek,
+                                        rec_length)))
+      {
+        queue_remove(&queue,0);
+        reuse_freed_buff(&queue, buffpek, rec_length);
+      }
+      else if (error == -1)
+        goto err;                        /* purecov: inspected */ 
     }
-    queue_replaced(&queue);                        // Top element has been used
+    queue_replace_top(&queue);            // Top element has been used
   }
   else
     cmp= 0;                                        // Not unique
@@ -1290,27 +1338,50 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     for (;;)
     {
       buffpek= (BUFFPEK*) queue_top(&queue);
+      src= buffpek->key;
       if (cmp)                                        // Remove duplicates
       {
-        if (!(*cmp)(first_cmp_arg, &(param->unique_buff),
+        if (!(*cmp)(first_cmp_arg, &unique_buff,
                     (uchar**) &buffpek->key))
-              goto skip_duplicate;
-            memcpy(param->unique_buff, (uchar*) buffpek->key, rec_length);
-      }
-      if (flag == 0)
-      {
-        if (my_b_write(to_file,(uchar*) buffpek->key, rec_length))
-        {
-          error=1; goto err;                        /* purecov: inspected */
+	{
+          if (min_dupl_count)
+	  {
+            element_count cnt;
+            memcpy(&cnt, (uchar *) buffpek->key+dupl_count_ofs, sizeof(cnt));
+            dupl_count+= cnt;
+          }
+          goto skip_duplicate;
+        }
+        if (min_dupl_count)
+	{
+          memcpy(unique_buff+dupl_count_ofs, &dupl_count,
+                 sizeof(dupl_count));
         }
+	src= unique_buff;
       }
-      else
+        
+      /* 
+        Do not write into the output file if this is the final merge called
+        for a Unique object used for intersection and dupl_count is less
+        than min_dupl_count.
+        If the Unique object is used to intersect N sets of unique elements
+        then for any element:
+        dupl_count >= N <=> the element is occurred in each of these N sets.
+      */          
+      if (!check_dupl_count || dupl_count >= min_dupl_count)
       {
-        if (my_b_write(to_file, (uchar*) buffpek->key+offset, res_length))
+        if (my_b_write(to_file, src+wr_offset, wr_len))
         {
           error=1; goto err;                        /* purecov: inspected */
         }
       }
+      if (cmp)
+      {   
+        memcpy(unique_buff, (uchar*) buffpek->key, rec_length);
+        if (min_dupl_count)
+          memcpy(&dupl_count, unique_buff+dupl_count_ofs, 
+                 sizeof(dupl_count));
+      }
       if (!--max_rows)
       {
         error= 0;                               /* purecov: inspected */
@@ -1321,21 +1392,21 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
       buffpek->key+= rec_length;
       if (! --buffpek->mem_count)
       {
-        if (!(error= (int) read_to_buffer(from_file,buffpek,
+        if (!(error= (int) read_to_buffer(from_file, buffpek,
                                           rec_length)))
         {
-          (void) queue_remove(&queue,0);
+          (void) queue_remove_top(&queue);
           reuse_freed_buff(&queue, buffpek, rec_length);
           break;                        /* One buffer have been removed */
         }
         else if (error == -1)
           goto err;                        /* purecov: inspected */
       }
-      queue_replaced(&queue);              /* Top element has been replaced */
+      queue_replace_top(&queue);   	/* Top element has been replaced */
     }
   }
   buffpek= (BUFFPEK*) queue_top(&queue);
-  buffpek->base= sort_buffer;
+  buffpek->base= (uchar*) sort_buffer;
   buffpek->max_keys= param->keys;
 
   /*
@@ -1344,11 +1415,35 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   */
   if (cmp)
   {
-    if (!(*cmp)(first_cmp_arg, &(param->unique_buff), (uchar**) &buffpek->key))
+    if (!(*cmp)(first_cmp_arg, &unique_buff, (uchar**) &buffpek->key))
     {
-      buffpek->key+= rec_length;         // Remove duplicate
+      if (min_dupl_count)
+      {
+        element_count cnt;
+        memcpy(&cnt, (uchar *) buffpek->key+dupl_count_ofs, sizeof(cnt));
+        dupl_count+= cnt;
+      }
+      buffpek->key+= rec_length;         
       --buffpek->mem_count;
     }
+
+    if (min_dupl_count)
+      memcpy(unique_buff+dupl_count_ofs, &dupl_count,
+             sizeof(dupl_count));
+
+    if (!check_dupl_count || dupl_count >= min_dupl_count)
+    {
+      src= unique_buff;
+      if (my_b_write(to_file, src+wr_offset, wr_len))
+      {
+        error=1; goto err;                        /* purecov: inspected */
+      }
+      if (!--max_rows)
+      {
+        error= 0;                               
+        goto end;                             
+      }
+    }   
   }
 
   do
@@ -1361,7 +1456,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     max_rows-= buffpek->mem_count;
     if (flag == 0)
     {
-      if (my_b_write(to_file,(uchar*) buffpek->key,
+      if (my_b_write(to_file, (uchar*) buffpek->key,
                      (rec_length*buffpek->mem_count)))
       {
         error= 1; goto err;                        /* purecov: inspected */
@@ -1370,19 +1465,25 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     else
     {
       register uchar *end;
-      strpos= buffpek->key+offset;
-      for (end= strpos+buffpek->mem_count*rec_length ;
-           strpos != end ;
-           strpos+= rec_length)
-      {     
-        if (my_b_write(to_file, (uchar *) strpos, res_length))
+      src= buffpek->key+offset;
+      for (end= src+buffpek->mem_count*rec_length ;
+           src != end ;
+           src+= rec_length)
+      {
+        if (check_dupl_count)
+        {
+          memcpy((uchar *) &dupl_count, src+dupl_count_ofs, sizeof(dupl_count)); 
+          if (dupl_count < min_dupl_count)
+	    continue;
+        }
+        if (my_b_write(to_file, src, wr_len))
         {
           error=1; goto err;                        
         }
       }
     }
   }
-  while ((error=(int) read_to_buffer(from_file,buffpek, rec_length))
+  while ((error=(int) read_to_buffer(from_file, buffpek, rec_length))
          != -1 && error != 0);
 
 end:
@@ -1396,7 +1497,7 @@ err:
 
 	/* Do a merge to output-file (save only positions) */
 
-static int merge_index(SORTPARAM *param, uchar *sort_buffer,
+int merge_index(SORTPARAM *param, uchar *sort_buffer,
 		       BUFFPEK *buffpek, uint maxbuffer,
 		       IO_CACHE *tempfile, IO_CACHE *outfile)
 {
@@ -1468,9 +1569,7 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
     }
     else
     {
-      sortorder->result_type= sortorder->item->result_type();
-      if (sortorder->item->result_as_longlong())
-        sortorder->result_type= INT_RESULT;
+      sortorder->result_type= sortorder->item->cmp_type();
       switch (sortorder->result_type) {
       case STRING_RESULT:
 	sortorder->length=sortorder->item->max_length;
@@ -1488,12 +1587,9 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
           sortorder->length+= sortorder->suffix_length;
         }
 	break;
+      case TIME_RESULT:
       case INT_RESULT:
-#if SIZEOF_LONG_LONG > 4
 	sortorder->length=8;			// Size of intern longlong
-#else
-	sortorder->length=4;
-#endif
 	break;
       case DECIMAL_RESULT:
         sortorder->length=
@@ -1568,6 +1664,7 @@ get_addon_fields(THD *thd, Field **ptabfield, uint sortlength, uint *plength)
     Actually we need only the fields referred in the
     result set. And for some of them it makes sense to use 
     the values directly from sorted fields.
+    But beware the case when item->cmp_type() != item->result_type()
   */
   *plength= 0;
 
@@ -1702,3 +1799,4 @@ void change_double_for_sort(double nr,uchar *to)
     }
   }
 }
+
diff --git a/sql/filesort.h b/sql/filesort.h
index 0ac48d8f889..8ee8999d055 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -31,6 +31,8 @@ ha_rows filesort(THD *thd, TABLE *table, st_sort_field *sortorder,
                  ha_rows max_rows, bool sort_positions,
                  ha_rows *examined_rows);
 void filesort_free_buffers(TABLE *table, bool full);
+double get_merge_many_buffs_cost(uint *buffer, uint last_n_elems,
+                                 int elem_size);
 void change_double_for_sort(double nr,uchar *to);
 
 #endif /* FILESORT_INCLUDED */
diff --git a/sql/gstream.h b/sql/gstream.h
index 110fed68bad..533fafa28f3 100644
--- a/sql/gstream.h
+++ b/sql/gstream.h
@@ -21,9 +21,6 @@
 #include "my_sys.h"                             /* MY_ALLOW_ZERO_PTR */
 #include "m_ctype.h"           /* my_charset_latin1, my_charset_bin */
 
-typedef struct charset_info_st CHARSET_INFO;
-typedef struct st_mysql_lex_string LEX_STRING;
-
 class Gis_read_stream
 {
 public:
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
index 610425ab735..7a62efb1917 100644
--- a/sql/ha_ndbcluster.cc
+++ b/sql/ha_ndbcluster.cc
@@ -4404,8 +4404,8 @@ void ha_ndbcluster::start_bulk_insert(ha_rows rows)
 int ha_ndbcluster::end_bulk_insert()
 {
   int error= 0;
-
   DBUG_ENTER("end_bulk_insert");
+
   // Check if last inserts need to be flushed
   if (m_bulk_insert_not_flushed)
   {
@@ -4757,7 +4757,7 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type)
   Thd_ndb *thd_ndb= get_thd_ndb(thd);
   Ndb *ndb= thd_ndb->ndb;
 
-  DBUG_PRINT("enter", ("this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
+  DBUG_PRINT("enter", ("this: 0x%lx  thd: 0x%lx  thd_ndb: 0x%lx  "
                        "thd_ndb->lock_count: %d",
                        (long) this, (long) thd, (long) thd_ndb,
                        thd_ndb->lock_count));
@@ -8979,6 +8979,8 @@ ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges,
   DBUG_RETURN(FALSE);
 }
 
+#if 0 
+/* MRR/NDB is disabled, for details see method declarations in ha_ndbcluster.h */
 int
 ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
                                       KEY_MULTI_RANGE *ranges, 
@@ -9391,6 +9393,7 @@ found_next:
   m_multi_range_result_ptr += reclength;
   DBUG_RETURN(0);
 }
+#endif 
 
 int
 ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)
@@ -11027,5 +11030,24 @@ mysql_declare_plugin(ndbcluster)
   0,                          /* flags                           */
 }
 mysql_declare_plugin_end;
+maria_declare_plugin(ndbcluster)
+{
+  MYSQL_STORAGE_ENGINE_PLUGIN,
+  &ndbcluster_storage_engine,
+  ndbcluster_hton_name,
+  "MySQL AB",
+  "Clustered, fault-tolerant tables",
+  PLUGIN_LICENSE_GPL,
+  ndbcluster_init, /* Plugin Init */
+  NULL, /* Plugin Deinit */
+  0x0100 /* 1.0 */,
+  ndb_status_variables_export,/* status variables                */
+  NULL,                       /* system variables                */
+  "1.0",                      /* string version */
+  MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */
+}
+maria_declare_plugin_end;
 
+#else
+int Sun_ar_require_a_symbol_here= 0;
 #endif
diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h
index 787b3eeaaea..f3c9553f524 100644
--- a/sql/ha_ndbcluster.h
+++ b/sql/ha_ndbcluster.h
@@ -263,10 +263,20 @@ class ha_ndbcluster: public handler
   /**
    * Multi range stuff
    */
+#if 0 
+  /*
+    MRR/NDB is disabled in MariaDB. This is because in MariaDB, we've
+    backported
+     - the latest version of MRR interface (BKA needs this)
+     - the latest version of DS-MRR implementation
+    but didn't backport the latest version MRR/NDB implementation.
+
+  */
   int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
                              KEY_MULTI_RANGE*ranges, uint range_count,
                              bool sorted, HANDLER_BUFFER *buffer);
   int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+#endif  
   bool null_value_index_search(KEY_MULTI_RANGE *ranges,
 			       KEY_MULTI_RANGE *end_range,
 			       HANDLER_BUFFER *buffer);
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 82bd39220a9..c580ae86439 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -55,6 +55,7 @@
 
 #include "sql_priv.h"
 #include "sql_parse.h"                          // append_file_to_dir
+#include "create_options.h"
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
 #include "ha_partition.h"
@@ -167,6 +168,7 @@ ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share)
   :handler(hton, share)
 {
   DBUG_ENTER("ha_partition::ha_partition(table)");
+  init_alloc_root(&m_mem_root, 512, 512);
   init_handler_variables();
   DBUG_VOID_RETURN;
 }
@@ -188,6 +190,7 @@ ha_partition::ha_partition(handlerton *hton, partition_info *part_info)
 {
   DBUG_ENTER("ha_partition::ha_partition(part_info)");
   DBUG_ASSERT(part_info);
+  init_alloc_root(&m_mem_root, 512, 512);
   init_handler_variables();
   m_part_info= part_info;
   m_create_handler= TRUE;
@@ -214,6 +217,7 @@ ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share,
   :handler(hton, share)
 {
   DBUG_ENTER("ha_partition::ha_partition(clone)");
+  init_alloc_root(&m_mem_root, 512, 512);
   init_handler_variables();
   m_part_info= part_info_arg;
   m_create_handler= TRUE;
@@ -241,6 +245,7 @@ void ha_partition::init_handler_variables()
   m_file_buffer= NULL;
   m_name_buffer_ptr= NULL;
   m_engine_array= NULL;
+  m_connect_string= NULL;
   m_file= NULL;
   m_file_tot_parts= 0;
   m_reorged_file= NULL;
@@ -264,7 +269,6 @@ void ha_partition::init_handler_variables()
   m_extra_prepare_for_update= FALSE;
   m_extra_cache_part_id= NO_CURRENT_PART_ID;
   m_handler_status= handler_not_initialized;
-  m_low_byte_first= 1;
   m_part_field_array= NULL;
   m_ordered_rec_buffer= NULL;
   m_top_entry= NO_CURRENT_PART_ID;
@@ -320,8 +324,12 @@ ha_partition::~ha_partition()
       delete m_file[i];
   }
   my_free(m_ordered_rec_buffer);
+  m_ordered_rec_buffer= NULL;
 
   clear_handler_file();
+
+  free_root(&m_mem_root, MYF(0));
+
   DBUG_VOID_RETURN;
 }
 
@@ -366,7 +374,7 @@ ha_partition::~ha_partition()
      The flag HA_READ_ORDER will be reset for the time being to indicate no
      ordered output is available from partition handler indexes. Later a merge
      sort will be performed using the underlying handlers.
-  5) primary_key_is_clustered, has_transactions and low_byte_first is
+  5) primary_key_is_clustered and has_transactions are
      calculated here.
 
 */
@@ -402,24 +410,17 @@ bool ha_partition::initialize_partition(MEM_ROOT *mem_root)
     We create all underlying table handlers here. We do it in this special
     method to be able to report allocation errors.
 
-    Set up low_byte_first, primary_key_is_clustered and
+    Set up primary_key_is_clustered and
     has_transactions since they are called often in all kinds of places,
     other parameters are calculated on demand.
     Verify that all partitions have the same table_flags.
   */
   check_table_flags= m_file[0]->ha_table_flags();
-  m_low_byte_first= m_file[0]->low_byte_first();
   m_pkey_is_clustered= TRUE;
   file_array= m_file;
   do
   {
     file= *file_array;
-    if (m_low_byte_first != file->low_byte_first())
-    {
-      // Cannot have handlers with different endian
-      my_error(ER_MIX_HANDLER_ERROR, MYF(0));
-      DBUG_RETURN(1);
-    }
     if (!file->primary_key_is_clustered())
       m_pkey_is_clustered= FALSE;
     if (check_table_flags != file->ha_table_flags())
@@ -589,6 +590,13 @@ int ha_partition::create(const char *name, TABLE *table_arg,
   char t_name[FN_REFLEN];
   DBUG_ENTER("ha_partition::create");
 
+  if (create_info->used_fields & HA_CREATE_USED_CONNECTION)
+  {
+    my_error(ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0),
+             "CONNECTION not valid for partition");
+    DBUG_RETURN(1);
+  }
+
   strmov(t_name, name);
   DBUG_ASSERT(*fn_rext((char*)name) == '\0');
   if (del_ren_cre_table(t_name, NULL, table_arg, create_info))
@@ -1079,8 +1087,8 @@ static bool print_admin_msg(THD* thd, const char* msg_type,
   va_list args;
   Protocol *protocol= thd->protocol;
   uint length, msg_length;
-  char msgbuf[MI_MAX_MSG_BUF];
-  char name[NAME_LEN*2+2];
+  char msgbuf[MYSQL_ERRMSG_SIZE];
+  char name[SAFE_NAME_LEN*2+2];
 
   va_start(args, fmt);
   msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
@@ -1090,7 +1098,7 @@ static bool print_admin_msg(THD* thd, const char* msg_type,
 
   if (!thd->vio_ok())
   {
-    sql_print_error("%s", msgbuf);
+    sql_print_error(fmt, args);
     return TRUE;
   }
 
@@ -1173,7 +1181,8 @@ int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt,
                 error != HA_ADMIN_ALREADY_DONE &&
                 error != HA_ADMIN_TRY_ALTER)
             {
-              print_admin_msg(thd, "error", table_share->db.str, table->alias,
+              print_admin_msg(thd, "error", table_share->db.str,
+                              table->alias.c_ptr(),
                               opt_op_name[flag],
                               "Subpartition %s returned error", 
                               sub_elem->partition_name);
@@ -1199,7 +1208,8 @@ int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt,
               error != HA_ADMIN_ALREADY_DONE &&
               error != HA_ADMIN_TRY_ALTER)
           {
-            print_admin_msg(thd, "error", table_share->db.str, table->alias,
+            print_admin_msg(thd, "error", table_share->db.str,
+                            table->alias.c_ptr(),
                             opt_op_name[flag], "Partition %s returned error", 
                             part_elem->partition_name);
           }
@@ -1308,6 +1318,7 @@ int ha_partition::prepare_new_partition(TABLE *tbl,
   if ((error= set_up_table_before_create(tbl, part_name, create_info,
                                          0, p_elem)))
     goto error_create;
+  tbl->s->connect_string = p_elem->connect_string;
   if ((error= file->ha_create(part_name, tbl, create_info)))
   {
     /*
@@ -1337,7 +1348,7 @@ int ha_partition::prepare_new_partition(TABLE *tbl,
 
   DBUG_RETURN(0);
 error_external_lock:
-  (void) file->close();
+  (void) file->ha_close();
 error_open:
   (void) file->ha_delete_table(part_name);
 error_create:
@@ -1383,7 +1394,7 @@ void ha_partition::cleanup_new_partition(uint part_count)
     while ((part_count > 0) && (*file))
     {
       (*file)->ha_external_lock(thd, F_UNLCK);
-      (*file)->close();
+      (*file)->ha_close();
 
       /* Leave the (*file)->ha_delete_table(part_name) to the ddl-log */
 
@@ -1748,11 +1759,11 @@ int ha_partition::copy_partitions(ulonglong * const copied,
     uint32 new_part;
 
     late_extra_cache(reorg_part);
-    if ((result= file->ha_rnd_init(1)))
+    if ((result= file->ha_rnd_init_with_error(1)))
       goto error;
     while (TRUE)
     {
-      if ((result= file->rnd_next(m_rec0)))
+      if ((result= file->ha_rnd_next(m_rec0)))
       {
         if (result == HA_ERR_RECORD_DELETED)
           continue;                              //Probably MyISAM
@@ -1828,6 +1839,8 @@ void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
     create_info->auto_increment_value= stats.auto_increment_value;
 
   create_info->data_file_name= create_info->index_file_name = NULL;
+  create_info->connect_string.str= NULL;
+  create_info->connect_string.length= 0;
   return;
 }
 
@@ -1974,6 +1987,8 @@ uint ha_partition::del_ren_cre_table(const char *from,
     {
       if ((error= set_up_table_before_create(table_arg, from_buff,
                                              create_info, i, NULL)) ||
+          parse_engine_table_options(ha_thd(), (*file)->ht,
+                                     (*file)->table_share) ||
           ((error= (*file)->ha_create(from_buff, table_arg, create_info))))
         goto create_error;
     }
@@ -2114,6 +2129,10 @@ int ha_partition::set_up_table_before_create(TABLE *tbl,
   }
   info->index_file_name= part_elem->index_file_name;
   info->data_file_name= part_elem->data_file_name;
+  info->connect_string= part_elem->connect_string;
+  if (info->connect_string.length)
+    info->used_fields|= HA_CREATE_USED_CONNECTION;
+  tbl->s->connect_string= part_elem->connect_string;
   DBUG_RETURN(0);
 }
 
@@ -2228,8 +2247,10 @@ bool ha_partition::create_handler_file(const char *name)
   /* 4 static words (tot words, checksum, tot partitions, name length) */
   tot_len_words= 4 + tot_partition_words + tot_name_words;
   tot_len_byte= PAR_WORD_SIZE * tot_len_words;
-  if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL))))
+  file_buffer= (uchar *) my_alloca(tot_len_byte);
+  if (!file_buffer)
     DBUG_RETURN(TRUE);
+  bzero(file_buffer, tot_len_byte);
   engine_array= (file_buffer + PAR_ENGINES_OFFSET);
   name_buffer_ptr= (char*) (engine_array + tot_partition_words * PAR_WORD_SIZE
                             + PAR_WORD_SIZE);
@@ -2289,11 +2310,28 @@ bool ha_partition::create_handler_file(const char *name)
   {
     result= mysql_file_write(file, (uchar *) file_buffer, tot_len_byte,
                              MYF(MY_WME | MY_NABP)) != 0;
+
+    /* Write connection information (for federatedx engine) */
+    part_it.rewind();
+    for (i= 0; i < num_parts && !result; i++)
+    {
+      uchar buffer[4];
+      part_elem= part_it++;
+      uint length = part_elem->connect_string.length;
+      int4store(buffer, length);
+      if (my_write(file, buffer, 4, MYF(MY_WME | MY_NABP)) ||
+          my_write(file, (uchar *) part_elem->connect_string.str, length,
+                   MYF(MY_WME | MY_NABP)))
+      {
+        result= TRUE;
+        break;
+      }
+    }
     (void) mysql_file_close(file, MYF(0));
   }
   else
     result= TRUE;
-  my_free(file_buffer);
+  my_afree((char*) file_buffer);
   DBUG_RETURN(result);
 }
 
@@ -2306,10 +2344,10 @@ void ha_partition::clear_handler_file()
 {
   if (m_engine_array)
     plugin_unlock_list(NULL, m_engine_array, m_tot_parts);
-  my_free(m_file_buffer);
-  my_free(m_engine_array);
+  free_root(&m_mem_root, MYF(MY_KEEP_PREALLOC));
   m_file_buffer= NULL;
   m_engine_array= NULL;
+  m_connect_string= NULL;
 }
 
 
@@ -2466,7 +2504,7 @@ bool ha_partition::read_par_file(const char *name)
   len_bytes= PAR_WORD_SIZE * len_words;
   if (mysql_file_seek(file, 0, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)
     goto err1;
-  if (!(file_buffer= (char*) my_malloc(len_bytes, MYF(0))))
+  if (!(file_buffer= (char*) alloc_root(&m_mem_root, len_bytes)))
     goto err1;
   if (mysql_file_read(file, (uchar *) file_buffer, len_bytes, MYF(MY_NABP)))
     goto err2;
@@ -2490,14 +2528,37 @@ bool ha_partition::read_par_file(const char *name)
   */
   if (len_words != (tot_partition_words + tot_name_words + 4))
     goto err2;
-  (void) mysql_file_close(file, MYF(0));
   m_file_buffer= file_buffer;          // Will be freed in clear_handler_file()
   m_name_buffer_ptr= tot_name_len_offset + PAR_WORD_SIZE;
 
+  if (!(m_connect_string= (LEX_STRING*)
+        alloc_root(&m_mem_root, m_tot_parts * sizeof(LEX_STRING))))
+    goto err2;
+  bzero(m_connect_string, m_tot_parts * sizeof(LEX_STRING));
+
+  /* Read connection arguments (for federated X engine) */
+  for (i= 0; i < m_tot_parts; i++)
+  {
+    LEX_STRING connect_string;
+    uchar buffer[4];
+    if (my_read(file, buffer, 4, MYF(MY_NABP)))
+    {
+      /* No extra options; Probably not a federatedx engine */
+      break;
+    }
+    connect_string.length= uint4korr(buffer);
+    connect_string.str= (char*) alloc_root(&m_mem_root, connect_string.length+1);
+    if (my_read(file, (uchar*) connect_string.str, connect_string.length,
+                MYF(MY_NABP)))
+      break;
+    connect_string.str[connect_string.length]= 0;
+    m_connect_string[i]= connect_string;
+  }
+
+  (void) mysql_file_close(file, MYF(0));
   DBUG_RETURN(false);
 
 err2:
-  my_free(file_buffer);
 err1:
   (void) mysql_file_close(file, MYF(0));
   DBUG_RETURN(true);
@@ -2536,13 +2597,13 @@ bool ha_partition::setup_engine_array(MEM_ROOT *mem_root)
       goto err;
   }
   if (!(m_engine_array= (plugin_ref*)
-                my_malloc(m_tot_parts * sizeof(plugin_ref), MYF(MY_WME))))
+        alloc_root(&m_mem_root, m_tot_parts * sizeof(plugin_ref))))
     goto err;
 
   for (i= 0; i < m_tot_parts; i++)
     m_engine_array[i]= ha_lock_engine(NULL, engine_array[i]);
 
-  my_afree((gptr) engine_array);
+  my_afree(engine_array);
     
   if (create_handlers(mem_root))
   {
@@ -2553,7 +2614,7 @@ bool ha_partition::setup_engine_array(MEM_ROOT *mem_root)
   DBUG_RETURN(false);
 
 err:
-  my_afree((gptr) engine_array);
+  my_afree(engine_array);
   DBUG_RETURN(true);
 }
 
@@ -2653,7 +2714,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked)
   name_buffer_ptr= m_name_buffer_ptr;
   m_start_key.length= 0;
   m_rec0= table->record[0];
-  m_rec_length= table_share->reclength;
+  m_rec_length= table_share->stored_rec_length;
   alloc_len= m_tot_parts * (m_rec_length + PARTITION_BYTES_IN_POS);
   alloc_len+= table_share->max_key_length;
   if (!m_ordered_rec_buffer)
@@ -2731,8 +2792,10 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked)
    {
       create_partition_name(name_buff, name, name_buffer_ptr, NORMAL_PART_NAME,
                             FALSE);
+      table->s->connect_string = m_connect_string[(uint)(file-m_file)];
       if ((error= (*file)->ha_open(table, name_buff, mode, test_if_locked)))
         goto err_handler;
+      bzero(&table->s->connect_string, sizeof(LEX_STRING));
       m_num_locks+= (*file)->lock_count();
       name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
     } while (*(++file));
@@ -2779,7 +2842,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked)
     Initialize priority queue, initialized to reading forward.
   */
   if ((error= init_queue(&m_queue, m_tot_parts, (uint) PARTITION_BYTES_IN_POS,
-                         0, key_rec_cmp, (void*)this)))
+                         0, key_rec_cmp, (void*)this, 0, 0)))
     goto err_handler;
 
   /*
@@ -2828,7 +2891,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked)
 err_handler:
   DEBUG_SYNC(ha_thd(), "partition_open_error");
   while (file-- != m_file)
-    (*file)->close();
+    (*file)->ha_close();
 err_alloc:
   bitmap_free(&m_bulk_insert_started);
   if (!m_is_clone_of)
@@ -2914,7 +2977,7 @@ int ha_partition::close(void)
 repeat:
   do
   {
-    (*file)->close();
+    (*file)->ha_close();
   } while (*(++file));
 
   if (first && m_added_file && m_added_file[0])
@@ -3793,7 +3856,7 @@ ha_rows ha_partition::guess_bulk_insert_rows()
     0                       Success
 
   Note: end_bulk_insert can be called without start_bulk_insert
-        being called, see bug¤44108.
+        being called, see bug#44108.
 
 */
 
@@ -4006,6 +4069,7 @@ int ha_partition::rnd_next(uchar *buf)
   int result= HA_ERR_END_OF_FILE;
   uint part_id= m_part_spec.start_part;
   DBUG_ENTER("ha_partition::rnd_next");
+  decrement_statistics(&SSV::ha_read_rnd_next_count);
 
   if (NO_CURRENT_PART_ID == part_id)
   {
@@ -4021,7 +4085,7 @@ int ha_partition::rnd_next(uchar *buf)
   
   while (TRUE)
   {
-    result= file->rnd_next(buf);
+    result= file->ha_rnd_next(buf);
     if (!result)
     {
       m_last_part= part_id;
@@ -4147,6 +4211,7 @@ int ha_partition::rnd_pos(uchar * buf, uchar *pos)
   uint part_id;
   handler *file;
   DBUG_ENTER("ha_partition::rnd_pos");
+  decrement_statistics(&SSV::ha_read_rnd_count);
 
   part_id= uint2korr((const uchar *) pos);
   DBUG_ASSERT(part_id < m_tot_parts);
@@ -4233,6 +4298,7 @@ int ha_partition::index_init(uint inx, bool sorted)
   m_part_spec.start_part= NO_CURRENT_PART_ID;
   m_start_key.length= 0;
   m_ordered= sorted;
+  m_ordered_scan_ongoing= FALSE;
   m_curr_key_info[0]= table->key_info+inx;
   if (m_pkey_is_clustered && table->s->primary_key != MAX_KEY)
   {
@@ -4359,6 +4425,7 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key,
                                  enum ha_rkey_function find_flag)
 {
   DBUG_ENTER("ha_partition::index_read_map");
+  decrement_statistics(&SSV::ha_read_key_count);
   end_range= 0;
   m_index_scan_type= partition_index_read;
   m_start_key.key= key;
@@ -4486,6 +4553,7 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key)
 int ha_partition::index_first(uchar * buf)
 {
   DBUG_ENTER("ha_partition::index_first");
+  decrement_statistics(&SSV::ha_read_first_count);
 
   end_range= 0;
   m_index_scan_type= partition_index_first;
@@ -4517,6 +4585,7 @@ int ha_partition::index_first(uchar * buf)
 int ha_partition::index_last(uchar * buf)
 {
   DBUG_ENTER("ha_partition::index_last");
+  decrement_statistics(&SSV::ha_read_last_count);
 
   m_index_scan_type= partition_index_last;
   DBUG_RETURN(common_first_last(buf));
@@ -4545,39 +4614,6 @@ int ha_partition::common_first_last(uchar *buf)
 
 
 /*
-  Read last using key
-
-  SYNOPSIS
-    index_read_last_map()
-    buf                   Read row in MySQL Row Format
-    key                   Key
-    keypart_map           Which part of key is used
-
-  RETURN VALUE
-    >0                    Error code
-    0                     Success
-
-  DESCRIPTION
-    This is used in join_read_last_key to optimise away an ORDER BY.
-    Can only be used on indexes supporting HA_READ_ORDER
-*/
-
-int ha_partition::index_read_last_map(uchar *buf, const uchar *key,
-                                      key_part_map keypart_map)
-{
-  DBUG_ENTER("ha_partition::index_read_last");
-
-  m_ordered= TRUE;				// Safety measure
-  end_range= 0;
-  m_index_scan_type= partition_index_read_last;
-  m_start_key.key= key;
-  m_start_key.keypart_map= keypart_map;
-  m_start_key.flag= HA_READ_PREFIX_LAST;
-  DBUG_RETURN(common_index_read(buf, TRUE));
-}
-
-
-/*
   Optimization of the default implementation to take advantage of dynamic
   partition pruning.
 */
@@ -4653,6 +4689,7 @@ int ha_partition::index_read_idx_map(uchar *buf, uint index,
 int ha_partition::index_next(uchar * buf)
 {
   DBUG_ENTER("ha_partition::index_next");
+  decrement_statistics(&SSV::ha_read_next_count);
 
   /*
     TODO(low priority):
@@ -4689,6 +4726,7 @@ int ha_partition::index_next(uchar * buf)
 int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
 {
   DBUG_ENTER("ha_partition::index_next_same");
+  decrement_statistics(&SSV::ha_read_next_count);
 
   DBUG_ASSERT(keylen == m_start_key.length);
   DBUG_ASSERT(m_index_scan_type != partition_index_last);
@@ -4716,6 +4754,7 @@ int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
 int ha_partition::index_prev(uchar * buf)
 {
   DBUG_ENTER("ha_partition::index_prev");
+  decrement_statistics(&SSV::ha_read_prev_count);
 
   /* TODO: read comment in index_next */
   DBUG_ASSERT(m_index_scan_type != partition_index_first);
@@ -4928,8 +4967,8 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
   }
   else if (is_next_same)
   {
-    if (!(error= file->index_next_same(buf, m_start_key.key,
-                                       m_start_key.length)))
+    if (!(error= file->ha_index_next_same(buf, m_start_key.key,
+                                          m_start_key.length)))
     {
       m_last_part= m_part_spec.start_part;
       DBUG_RETURN(0);
@@ -4937,7 +4976,7 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
   }
   else 
   {
-    if (!(error= file->index_next(buf)))
+    if (!(error= file->ha_index_next(buf)))
     {
       m_last_part= m_part_spec.start_part;
       DBUG_RETURN(0);                           // Row was in range
@@ -4992,13 +5031,13 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
       break;
     case partition_index_read:
       DBUG_PRINT("info", ("index_read on partition %d", i));
-      error= file->index_read_map(buf, m_start_key.key,
-                                  m_start_key.keypart_map,
-                                  m_start_key.flag);
+      error= file->ha_index_read_map(buf, m_start_key.key,
+                                     m_start_key.keypart_map,
+                                     m_start_key.flag);
       break;
     case partition_index_first:
       DBUG_PRINT("info", ("index_first on partition %d", i));
-      error= file->index_first(buf);
+      error= file->ha_index_first(buf);
       break;
     case partition_index_first_unordered:
       /*
@@ -5061,7 +5100,7 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
 int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
 {
   uint i;
-  uint j= 0;
+  uint j= queue_first_element(&m_queue);
   bool found= FALSE;
   DBUG_ENTER("ha_partition::handle_ordered_index_scan");
 
@@ -5077,25 +5116,25 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
     int error;
     handler *file= m_file[i];
 
+    /*
+      Reset null bits (to avoid valgrind warnings) and to give a default
+      value for not read null fields.
+    */
+    bfill(rec_buf_ptr, table->s->null_bytes, 255);
+
     switch (m_index_scan_type) {
     case partition_index_read:
-      error= file->index_read_map(rec_buf_ptr,
-                                  m_start_key.key,
-                                  m_start_key.keypart_map,
-                                  m_start_key.flag);
+      error= file->ha_index_read_map(rec_buf_ptr,
+                                     m_start_key.key,
+                                     m_start_key.keypart_map,
+                                     m_start_key.flag);
       break;
     case partition_index_first:
-      error= file->index_first(rec_buf_ptr);
+      error= file->ha_index_first(rec_buf_ptr);
       reverse_order= FALSE;
       break;
     case partition_index_last:
-      error= file->index_last(rec_buf_ptr);
-      reverse_order= TRUE;
-      break;
-    case partition_index_read_last:
-      error= file->index_read_last_map(rec_buf_ptr,
-                                       m_start_key.key,
-                                       m_start_key.keypart_map);
+      error= file->ha_index_last(rec_buf_ptr);
       reverse_order= TRUE;
       break;
     case partition_read_range:
@@ -5135,7 +5174,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
     */
     queue_set_max_at_top(&m_queue, reverse_order);
     queue_set_cmp_arg(&m_queue, (void*)m_curr_key_info);
-    m_queue.elements= j;
+    m_queue.elements= j - queue_first_element(&m_queue);
     queue_fix(&m_queue);
     return_top_record(buf);
     table->status= 0;
@@ -5197,16 +5236,16 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
     memcpy(rec_buf(part_id), table->record[0], m_rec_length);
   }
   else if (!is_next_same)
-    error= file->index_next(rec_buf(part_id));
+    error= file->ha_index_next(rec_buf(part_id));
   else
-    error= file->index_next_same(rec_buf(part_id), m_start_key.key,
-				 m_start_key.length);
+    error= file->ha_index_next_same(rec_buf(part_id), m_start_key.key,
+                                    m_start_key.length);
   if (error)
   {
     if (error == HA_ERR_END_OF_FILE)
     {
       /* Return next buffered row */
-      queue_remove(&m_queue, (uint) 0);
+      queue_remove_top(&m_queue);
       if (m_queue.elements)
       {
          DBUG_PRINT("info", ("Record returned from partition %u (2)",
@@ -5218,7 +5257,7 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
     }
     DBUG_RETURN(error);
   }
-  queue_replaced(&m_queue);
+  queue_replace_top(&m_queue);
   return_top_record(buf);
   DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
   DBUG_RETURN(0);
@@ -5245,11 +5284,11 @@ int ha_partition::handle_ordered_prev(uchar *buf)
   handler *file= m_file[part_id];
   DBUG_ENTER("ha_partition::handle_ordered_prev");
 
-  if ((error= file->index_prev(rec_buf(part_id))))
+  if ((error= file->ha_index_prev(rec_buf(part_id))))
   {
     if (error == HA_ERR_END_OF_FILE)
     {
-      queue_remove(&m_queue, (uint) 0);
+      queue_remove_top(&m_queue);
       if (m_queue.elements)
       {
 	return_top_record(buf);
@@ -5261,7 +5300,7 @@ int ha_partition::handle_ordered_prev(uchar *buf)
     }
     DBUG_RETURN(error);
   }
-  queue_replaced(&m_queue);
+  queue_replace_top(&m_queue);
   return_top_record(buf);
   DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
   DBUG_RETURN(0);
@@ -5582,7 +5621,7 @@ void ha_partition::get_dynamic_partition_info(PARTITION_STATS *stat_info,
   stat_info->update_time=          file->stats.update_time;
   stat_info->check_time=           file->stats.check_time;
   stat_info->check_sum= 0;
-  if (file->ha_table_flags() & HA_HAS_CHECKSUM)
+  if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
     stat_info->check_sum= file->checksum();
   return;
 }
@@ -5909,6 +5948,7 @@ int ha_partition::extra(enum ha_extra_function operation)
   case HA_EXTRA_KEYREAD:
   case HA_EXTRA_NO_KEYREAD:
   case HA_EXTRA_FLUSH:
+  case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE:
     DBUG_RETURN(loop_extra(operation));
 
     /* Category 2), used by non-MyISAM handlers */
@@ -5945,9 +5985,7 @@ int ha_partition::extra(enum ha_extra_function operation)
   case HA_EXTRA_PREPARE_FOR_DROP:
   case HA_EXTRA_FLUSH_CACHE:
   {
-    if (m_myisam)
-      DBUG_RETURN(loop_extra(operation));
-    break;
+    DBUG_RETURN(loop_extra(operation));
   }
   case HA_EXTRA_NO_READCHECK:
   {
@@ -6222,6 +6260,7 @@ void ha_partition::late_extra_cache(uint partition_id)
   }
   if (m_extra_prepare_for_update)
   {
+    DBUG_ASSERT(m_extra_cache);
     (void) file->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
   }
   m_extra_cache_part_id= partition_id;
@@ -7187,5 +7226,22 @@ mysql_declare_plugin(partition)
   0,                          /* flags                           */
 }
 mysql_declare_plugin_end;
+maria_declare_plugin(partition)
+{
+  MYSQL_STORAGE_ENGINE_PLUGIN,
+  &partition_storage_engine,
+  "partition",
+  "Mikael Ronstrom, MySQL AB",
+  "Partition Storage Engine Helper",
+  PLUGIN_LICENSE_GPL,
+  partition_initialize, /* Plugin Init */
+  NULL, /* Plugin Deinit */
+  0x0100, /* 1.0 */
+  NULL,                       /* status variables                */
+  NULL,                       /* system variables                */
+  "1.0",                      /* string version                  */
+  MariaDB_PLUGIN_MATURITY_STABLE /* maturity                     */
+}
+maria_declare_plugin_end;
 
 #endif
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index 3cc65bbde64..46a8924c883 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -30,7 +30,6 @@ enum partition_keywords
   PKW_COLUMNS
 };
 
-
 #define PARTITION_BYTES_IN_POS 2
 #define PARTITION_ENABLED_TABLE_FLAGS (HA_FILE_BASED | HA_REC_NOT_IN_SEQ)
 #define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \
@@ -57,21 +56,22 @@ private:
     partition_index_first= 1,
     partition_index_first_unordered= 2,
     partition_index_last= 3,
-    partition_index_read_last= 4,
-    partition_read_range = 5,
-    partition_no_index_scan= 6
+    partition_read_range = 4,
+    partition_no_index_scan= 5
   };
   /* Data for the partition handler */
   int  m_mode;                          // Open mode
   uint m_open_test_lock;                // Open test_if_locked
   char *m_file_buffer;                  // Content of the .par file 
   char *m_name_buffer_ptr;		// Pointer to first partition name
+  MEM_ROOT m_mem_root;
   plugin_ref *m_engine_array;           // Array of types of the handlers
   handler **m_file;                     // Array of references to handler inst.
   uint m_file_tot_parts;                // Debug
   handler **m_new_file;                 // Array of references to new handlers
   handler **m_reorged_file;             // Reorganised partitions
   handler **m_added_file;               // Added parts kept for errors
+  LEX_STRING *m_connect_string;
   partition_info *m_part_info;          // local reference to partition
   Field **m_part_field_array;           // Part field array locally to save acc
   uchar *m_ordered_rec_buffer;          // Row and key buffer for ord. idx scan
@@ -92,7 +92,6 @@ private:
     for this since the MySQL Server sometimes allocating the handler object
     without freeing them.
   */
-  ulong m_low_byte_first;
   enum enum_handler_status
   {
     handler_not_initialized= 0,
@@ -479,8 +478,6 @@ public:
   virtual int index_first(uchar * buf);
   virtual int index_last(uchar * buf);
   virtual int index_next_same(uchar * buf, const uchar * key, uint keylen);
-  virtual int index_read_last_map(uchar * buf, const uchar * key,
-                                  key_part_map keypart_map);
 
   /*
     read_first_row is virtual method but is only implemented by
@@ -868,6 +865,10 @@ public:
   */
   virtual ulong index_flags(uint inx, uint part, bool all_parts) const
   {
+    /*
+      The following code is not safe if you are using different
+      storage engines or different index types per partition.
+    */
     return m_file[0]->index_flags(inx, part, all_parts);
   }
 
@@ -894,12 +895,6 @@ public:
   virtual uint max_supported_key_part_length() const;
 
   /*
-    All handlers in a partitioned table must have the same low_byte_first
-  */
-  virtual bool low_byte_first() const
-  { return m_low_byte_first; }
-
-  /*
     The extra record buffer length is the maximum needed by all handlers.
     The minimum record length is the maximum of all involved handlers.
   */
@@ -1133,6 +1128,29 @@ public:
     -------------------------------------------------------------------------
     virtual void append_create_info(String *packet)
   */
+
+  /*
+    the following heavily relies on the fact that all partitions
+    are in the same storage engine.
+
+    When this limitation is lifted, the following hack should go away,
+    and a proper interface for engines needs to be introduced:
+
+      an PARTITION_SHARE structure that has a pointer to the TABLE_SHARE.
+      is given to engines everywhere where TABLE_SHARE is used now
+      has members like option_struct, ha_data
+      perhaps TABLE needs to be split the same way too...
+
+    this can also be done before partition will support a mix of engines,
+    but preferably together with other incompatible API changes.
+  */
+  virtual handlerton *partition_ht() const
+  {
+    handlerton *h= m_file[0]->ht;
+    for (uint i=1; i < m_tot_parts; i++)
+      DBUG_ASSERT(h == m_file[i]->ht);
+    return h;
+  }
 };
 
 #endif /* HA_PARTITION_INCLUDED */
diff --git a/sql/handler.cc b/sql/handler.cc
index 6f7cf2c3456..b2736e4559d 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -27,6 +28,7 @@
 #include "unireg.h"
 #include "rpl_handler.h"
 #include "sql_cache.h"                   // query_cache, query_cache_*
+#include "sql_connect.h"                 // global_table_stats
 #include "key.h"     // key_copy, key_unpack, key_cmp_if_same, key_cmp
 #include "sql_table.h"                   // build_table_filename
 #include "sql_parse.h"                          // check_stack_overrun
@@ -34,10 +36,11 @@
 #include "sql_base.h"           // free_io_cache
 #include "discover.h"           // writefrm
 #include "log_event.h"          // *_rows_log_event
+#include "create_options.h"
 #include "rpl_filter.h"
 #include <myisampack.h>
 #include "transaction.h"
-#include <errno.h>
+#include "myisam.h"
 #include "probes_mysql.h"
 #include "debug_sync.h"         // DEBUG_SYNC
 
@@ -45,6 +48,10 @@
 #include "ha_partition.h"
 #endif
 
+#ifdef WITH_ARIA_STORAGE_ENGINE
+#include "../storage/maria/ha_maria.h"
+#endif
+
 /*
   While we have legacy_db_type, we have this array to
   check for dups and to find handlerton from legacy_db_type.
@@ -72,12 +79,13 @@ static const LEX_STRING sys_table_aliases[]=
   { C_STRING_WITH_LEN("NDB") },       { C_STRING_WITH_LEN("NDBCLUSTER") },
   { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
   { C_STRING_WITH_LEN("MERGE") },     { C_STRING_WITH_LEN("MRG_MYISAM") },
+  { C_STRING_WITH_LEN("Maria") },      { C_STRING_WITH_LEN("Aria") },
   {NullS, 0}
 };
 
 const char *ha_row_type[] = {
   "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
-  /* Reserved to be "PAGE" in future versions */ "?",
+  "PAGE",
   "?","?","?"
 };
 
@@ -90,13 +98,14 @@ TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
 static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
 uint known_extensions_id= 0;
 
-
+static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
+                              bool is_real_trans);
 
 static plugin_ref ha_default_plugin(THD *thd)
 {
   if (thd->variables.table_plugin)
     return thd->variables.table_plugin;
-  return my_plugin_lock(thd, &global_system_variables.table_plugin);
+  return my_plugin_lock(thd, global_system_variables.table_plugin);
 }
 
 
@@ -177,13 +186,8 @@ plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
 {
   if (hton)
   {
-    st_plugin_int **plugin= hton2plugin + hton->slot;
-    
-#ifdef DBUG_OFF
-    return my_plugin_lock(thd, plugin);
-#else
-    return my_plugin_lock(thd, &plugin);
-#endif
+    st_plugin_int *plugin= hton2plugin[hton->slot];
+    return my_plugin_lock(thd, plugin_int_to_ref(plugin));
   }
   return NULL;
 }
@@ -358,7 +362,8 @@ int ha_init_errors(void)
   SETMSG(HA_ERR_AUTOINC_ERANGE,         ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
   SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
   SETMSG(HA_ERR_INDEX_COL_TOO_LONG,	ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
-  SETMSG(HA_ERR_INDEX_CORRUPT,		ER_DEFAULT(ER_INDEX_CORRUPT));
+  SETMSG(HA_ERR_INDEX_CORRUPT,         ER_DEFAULT(ER_INDEX_CORRUPT));
+  SETMSG(HA_ERR_DISK_FULL,              ER_DEFAULT(ER_DISK_FULL));
 
   /* Register the error messages for use with my_error(). */
   return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
@@ -394,8 +399,7 @@ int ha_finalize_handlerton(st_plugin_int *plugin)
   if (!hton)
     goto end;
 
-  switch (hton->state)
-  {
+  switch (hton->state) {
   case SHOW_OPTION_NO:
   case SHOW_OPTION_DISABLED:
     break;
@@ -464,8 +468,8 @@ int ha_initialize_handlerton(st_plugin_int *plugin)
   if (plugin->plugin->init && plugin->plugin->init(hton))
   {
     sql_print_error("Plugin '%s' init function returned error.",
-                    plugin->name.str);
-    goto err;  
+		    plugin->name.str);
+    goto err;
   }
 
   /*
@@ -492,13 +496,17 @@ int ha_initialize_handlerton(st_plugin_int *plugin)
         if (idx == (int) DB_TYPE_DEFAULT)
         {
           sql_print_warning("Too many storage engines!");
-          goto err_deinit;
+	  goto err_deinit;
         }
         if (hton->db_type != DB_TYPE_UNKNOWN)
           sql_print_warning("Storage engine '%s' has conflicting typecode. "
                             "Assigning value %d.", plugin->plugin->name, idx);
         hton->db_type= (enum legacy_db_type) idx;
       }
+      installed_htons[hton->db_type]= hton;
+      tmp= hton->savepoint_offset;
+      hton->savepoint_offset= savepoint_alloc_size;
+      savepoint_alloc_size+= tmp;
 
       /*
         In case a plugin is uninstalled and re-installed later, it should
@@ -623,6 +631,23 @@ void ha_drop_database(char* path)
 }
 
 
+static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin,
+                                           void *disable)
+{
+  handlerton *hton= plugin_data(plugin, handlerton *);
+  if (hton->state == SHOW_OPTION_YES && hton->checkpoint_state)
+    hton->checkpoint_state(hton, (int) *(bool*) disable);
+  return FALSE;
+}
+
+
+void ha_checkpoint_state(bool disable)
+{
+  plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable);
+}
+
+
+
 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
                                    void *unused)
 {
@@ -1104,7 +1129,7 @@ ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
 */
 int ha_commit_trans(THD *thd, bool all)
 {
-  int error= 0, cookie= 0;
+  int error= 0, cookie;
   /*
     'all' means that this is either an explicit commit issued by
     user, or an implicit commit issued by a DDL.
@@ -1119,9 +1144,16 @@ int ha_commit_trans(THD *thd, bool all)
   */
   bool is_real_trans= all || thd->transaction.all.ha_list == 0;
   Ha_trx_info *ha_info= trans->ha_list;
-  my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
+  bool need_prepare_ordered, need_commit_ordered;
+  my_xid xid;
   DBUG_ENTER("ha_commit_trans");
 
+  /* Just a random warning to test warnings pushed during autocommit. */
+  DBUG_EXECUTE_IF("warn_during_ha_commit_trans",
+    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                 ER_WARNING_NOT_COMPLETE_ROLLBACK,
+                 ER(ER_WARNING_NOT_COMPLETE_ROLLBACK)););
+
   /*
     We must not commit the normal transaction if a statement
     transaction is pending. Otherwise statement transaction
@@ -1152,115 +1184,142 @@ int ha_commit_trans(THD *thd, bool all)
     DBUG_RETURN(2);
   }
 
-  if (ha_info)
-  {
-    uint rw_ha_count;
-    bool rw_trans;
-    MDL_request mdl_request;
-
-    DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
+#ifdef WITH_ARIA_STORAGE_ENGINE
+    ha_maria::implicit_commit(thd, FALSE);
+#endif
 
-    /* Close all cursors that can not survive COMMIT */
-    if (is_real_trans)                          /* not a statement commit */
-      thd->stmt_map.close_transient_cursors();
+  if (!ha_info)
+  {
+    /* Free resources and perform other cleanup even for 'empty' transactions. */
+    if (is_real_trans)
+      thd->transaction.cleanup();
+    DBUG_RETURN(0);
+  }
 
-    rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
-    /* rw_trans is TRUE when we in a transaction changing data */
-    rw_trans= is_real_trans && (rw_ha_count > 0);
+  DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
 
-    if (rw_trans)
-    {
-      /*
-        Acquire a metadata lock which will ensure that COMMIT is blocked
-        by an active FLUSH TABLES WITH READ LOCK (and vice versa:
-        COMMIT in progress blocks FTWRL).
+  /* Close all cursors that can not survive COMMIT */
+  if (is_real_trans)                          /* not a statement commit */
+    thd->stmt_map.close_transient_cursors();
 
-        We allow the owner of FTWRL to COMMIT; we assume that it knows
-        what it does.
-      */
-      mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
-                       MDL_EXPLICIT);
+  uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
+  /* rw_trans is TRUE when we in a transaction changing data */
+  bool rw_trans= is_real_trans && (rw_ha_count > 0);
+  MDL_request mdl_request;
 
-      if (thd->mdl_context.acquire_lock(&mdl_request,
-                                        thd->variables.lock_wait_timeout))
-      {
-        ha_rollback_trans(thd, all);
-        DBUG_RETURN(1);
-      }
+  if (rw_trans)
+  {
+    /*
+      Acquire a metadata lock which will ensure that COMMIT is blocked
+      by an active FLUSH TABLES WITH READ LOCK (and vice versa:
+      COMMIT in progress blocks FTWRL).
 
-      DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
-    }
+      We allow the owner of FTWRL to COMMIT; we assume that it knows
+      what it does.
+    */
+    mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
+                     MDL_EXPLICIT);
 
-    if (rw_trans &&
-        opt_readonly &&
-        !(thd->security_ctx->master_access & SUPER_ACL) &&
-        !thd->slave_thread)
+    if (thd->mdl_context.acquire_lock(&mdl_request,
+                                      thd->variables.lock_wait_timeout))
     {
-      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
       ha_rollback_trans(thd, all);
-      error= 1;
-      goto end;
+      DBUG_RETURN(1);
     }
 
-    if (!trans->no_2pc && (rw_ha_count > 1))
-    {
-      for (; ha_info && !error; ha_info= ha_info->next())
-      {
-        int err;
-        handlerton *ht= ha_info->ht();
-        /*
-          Do not call two-phase commit if this particular
-          transaction is read-only. This allows for simpler
-          implementation in engines that are always read-only.
-        */
-        if (! ha_info->is_trx_read_write())
-          continue;
-        /*
-          Sic: we know that prepare() is not NULL since otherwise
-          trans->no_2pc would have been set.
-        */
-        if ((err= ht->prepare(ht, thd, all)))
-        {
-          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
-          error= 1;
-        }
-        status_var_increment(thd->status_var.ha_prepare_count);
-      }
-      DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
-      if (error || (is_real_trans && xid &&
-                    (error= !(cookie= tc_log->log_xid(thd, xid)))))
-      {
-        ha_rollback_trans(thd, all);
-        error= 1;
-        goto end;
-      }
-      DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
-    }
-    error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
-    DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
-    if (cookie)
-      if(tc_log->unlog(cookie, xid))
-      {
-        error= 2;
-        goto end;
-      }
-    DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
-    RUN_HOOK(transaction, after_commit, (thd, FALSE));
+    DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
+  }
+
+  if (rw_trans &&
+      opt_readonly &&
+      !(thd->security_ctx->master_access & SUPER_ACL) &&
+      !thd->slave_thread)
+  {
+    my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
+    goto err;
+  }
+
+  if (trans->no_2pc || (rw_ha_count <= 1))
+  {
+    error= ha_commit_one_phase(thd, all);
+    goto done;
+  }
+
+  need_prepare_ordered= FALSE;
+  need_commit_ordered= FALSE;
+  xid= thd->transaction.xid_state.xid.get_my_xid();
+
+  for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
+  {
+    int err;
+    handlerton *ht= hi->ht();
+    /*
+      Do not call two-phase commit if this particular
+      transaction is read-only. This allows for simpler
+      implementation in engines that are always read-only.
+    */
+    if (! hi->is_trx_read_write())
+      continue;
+    /*
+      Sic: we know that prepare() is not NULL since otherwise
+      trans->no_2pc would have been set.
+    */
+    err= ht->prepare(ht, thd, all);
+    status_var_increment(thd->status_var.ha_prepare_count);
+    if (err)
+      my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+
+    if (err)
+      goto err;
+
+    need_prepare_ordered|= (ht->prepare_ordered != NULL);
+    need_commit_ordered|= (ht->commit_ordered != NULL);
+  }
+  DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
+
+  if (!is_real_trans)
+  {
+    error= commit_one_phase_2(thd, all, trans, is_real_trans);
+    goto done;
+  }
+
+  cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
+                                need_commit_ordered);
+  if (!cookie)
+    goto err;
+
+  DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
+
+  error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
+
+  DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
+  if (tc_log->unlog(cookie, xid))
+  {
+    error= 2;                                /* Error during commit */
+    goto end;
+  }
+
+done:
+  DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+  RUN_HOOK(transaction, after_commit, (thd, FALSE));
+  goto end;
+
+  /* Come here if error and we need to rollback. */
+err:
+  error= 1;                                  /* Transaction was rolled back */
+  ha_rollback_trans(thd, all);
+
 end:
-    if (rw_trans && mdl_request.ticket)
-    {
-      /*
-        We do not always immediately release transactional locks
-        after ha_commit_trans() (see uses of ha_enable_transaction()),
-        thus we release the commit blocker lock as soon as it's
-        not needed.
-      */
-      thd->mdl_context.release_lock(mdl_request.ticket);
-    }
+  if (rw_trans && mdl_request.ticket)
+  {
+    /*
+      We do not always immediately release transactional locks
+      after ha_commit_trans() (see uses of ha_enable_transaction()),
+      thus we release the commit blocker lock as soon as it's
+      not needed.
+    */
+    thd->mdl_context.release_lock(mdl_request.ticket);
   }
-  /* Free resources and perform other cleanup even for 'empty' transactions. */
-  else if (is_real_trans)
-    thd->transaction.cleanup();
   DBUG_RETURN(error);
 }
 
@@ -1277,7 +1336,6 @@ end:
 
 int ha_commit_one_phase(THD *thd, bool all)
 {
-  int error=0;
   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
   /*
     "real" is a nick name for a transaction for which a commit will
@@ -1293,9 +1351,18 @@ int ha_commit_one_phase(THD *thd, bool all)
     transaction.all.ha_list, see why in trans_register_ha()).
   */
   bool is_real_trans=all || thd->transaction.all.ha_list == 0;
-  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
   DBUG_ENTER("ha_commit_one_phase");
+  int res= commit_one_phase_2(thd, all, trans, is_real_trans);
+  DBUG_RETURN(res);
+}
+
 
+static int
+commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
+{
+  int error= 0;
+  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
+  DBUG_ENTER("commit_one_phase_2");
   if (ha_info)
   {
     for (; ha_info; ha_info= ha_info_next)
@@ -1307,6 +1374,7 @@ int ha_commit_one_phase(THD *thd, bool all)
         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
         error=1;
       }
+      /* Should this be done only if is_real_trans is set ? */
       status_var_increment(thd->status_var.ha_commit_count);
       ha_info_next= ha_info->next();
       ha_info->reset(); /* keep it conveniently zero-filled */
@@ -1317,7 +1385,7 @@ int ha_commit_one_phase(THD *thd, bool all)
     {
 #ifdef HAVE_QUERY_CACHE
       if (thd->transaction.changed_tables)
-        query_cache.invalidate(thd->transaction.changed_tables);
+        query_cache.invalidate(thd, thd->transaction.changed_tables);
 #endif
     }
   }
@@ -1556,7 +1624,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
     while ((got= hton->recover(hton, info->list, info->len)) > 0 )
     {
       sql_print_information("Found %d prepared transaction(s) in %s",
-                            got, ha_resolve_storage_engine_name(hton));
+                            got, hton_name(hton)->str);
       for (int i=0; i < got; i ++)
       {
         my_xid x=info->list[i].get_my_xid();
@@ -1623,16 +1691,6 @@ int ha_recover(HASH *commit_list)
   if (info.commit_list)
     sql_print_information("Starting crash recovery...");
 
-#ifndef WILL_BE_DELETED_LATER
-  /*
-    for now, only InnoDB supports 2pc. It means we can always safely
-    rollback all pending transactions, without risking inconsistent data
-  */
-  DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog
-  tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
-  info.dry_run=FALSE;
-#endif
-
   for (info.len= MAX_XID_LIST_SIZE ; 
        info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
   {
@@ -1889,7 +1947,16 @@ int ha_start_consistent_snapshot(THD *thd)
 {
   bool warn= true;
 
+  /*
+    Holding the LOCK_commit_ordered mutex ensures that we get the same
+    snapshot for all engines (including the binary log).  This allows us
+    among other things to do backups with
+    START TRANSACTION WITH CONSISTENT SNAPSHOT and
+    have a consistent binlog position.
+  */
+  mysql_mutex_lock(&LOCK_commit_ordered);
   plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
+  mysql_mutex_unlock(&LOCK_commit_ordered);
 
   /*
     Same idea as when one wants to CREATE TABLE in one engine which does not
@@ -2056,7 +2123,8 @@ int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
     dummy_share.db.length= strlen(db);
     dummy_share.table_name.str= (char*) alias;
     dummy_share.table_name.length= strlen(alias);
-    dummy_table.alias= alias;
+    dummy_table.alias.set(alias, dummy_share.table_name.length,
+                          table_alias_charset);
 
     file->change_table_ptr(&dummy_table, &dummy_share);
 
@@ -2082,33 +2150,48 @@ int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
 {
   handler *new_handler= get_new_handler(table->s, mem_root, ht);
+  if (! new_handler)
+    return NULL;
+
   /*
     Allocate handler->ref here because otherwise ha_open will allocate it
     on this->table->mem_root and we will not be able to reclaim that memory 
     when the clone handler object is destroyed.
   */
-  if (new_handler &&
-     !(new_handler->ref= (uchar*) alloc_root(mem_root,
-                                             ALIGN_SIZE(ref_length)*2)))
-    new_handler= NULL;
+
+  if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
+                                              ALIGN_SIZE(ref_length)*2)))
+    return NULL;
+
   /*
     TODO: Implement a more efficient way to have more than one index open for
     the same table instance. The ha_open call is not cachable for clone.
+
+    This is not critical as the engines already have the table open
+    and should be able to use the original instance of the table.
   */
-  if (new_handler && new_handler->ha_open(table,
-                                          name,
-                                          table->db_stat,
-                                          HA_OPEN_IGNORE_IF_LOCKED))
-    new_handler= NULL;
+  if (new_handler->ha_open(table, name, table->db_stat,
+                           HA_OPEN_IGNORE_IF_LOCKED))
+    return NULL;
 
   return new_handler;
 }
 
 
-
-void handler::ha_statistic_increment(ulong SSV::*offset) const
+double handler::keyread_time(uint index, uint ranges, ha_rows rows)
 {
-  status_var_increment(table->in_use->status_var.*offset);
+  /*
+    It is assumed that we will read trough the whole key range and that all
+    key blocks are half full (normally things are much better). It is also
+    assumed that each time we read the next key from the index, the handler
+    performs a random seek, thus the cost is proportional to the number of
+    blocks read. This model does not take into account clustered indexes -
+    engines that support that (e.g. InnoDB) may want to overwrite this method.
+  */
+  double keys_per_block= (stats.block_size/2.0/
+                          (table->key_info[index].key_length +
+                           ref_length) + 1);
+  return (rows + keys_per_block - 1)/ keys_per_block;
 }
 
 void **handler::ha_data(THD *thd) const
@@ -2135,7 +2218,7 @@ PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const
     Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
 */
 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
-                     int test_if_locked)
+                     uint test_if_locked)
 {
   int error;
   DBUG_ENTER("handler::ha_open");
@@ -2179,9 +2262,34 @@ int handler::ha_open(TABLE *table_arg, const char *name, int mode,
       dup_ref=ref+ALIGN_SIZE(ref_length);
     cached_table_flags= table_flags();
   }
+  reset_statistics();
+  internal_tmp_table= test(test_if_locked & HA_OPEN_INTERNAL_TABLE);
   DBUG_RETURN(error);
 }
 
+int handler::ha_close()
+{
+  DBUG_ENTER("ha_close");
+  /*
+    Increment global statistics for temporary tables.
+    In_use is 0 for tables that was closed from the table cache.
+  */
+  if (table->in_use)
+    status_var_add(table->in_use->status_var.rows_tmp_read, rows_tmp_read);
+  DBUG_RETURN(close());
+}
+
+/* Initialize handler for random reading, with error handling */
+
+int handler::ha_rnd_init_with_error(bool scan)
+{
+  int error;
+  if (!(error= ha_rnd_init(scan)))
+    return 0;
+  table->file->print_error(error, MYF(0));
+  return error;
+}
+
 
 /**
   Read first row (only) from a table.
@@ -2194,8 +2302,6 @@ int handler::read_first_row(uchar * buf, uint primary_key)
   register int error;
   DBUG_ENTER("handler::read_first_row");
 
-  ha_statistic_increment(&SSV::ha_read_first_count);
-
   /*
     If there is very few deleted rows in the table, find the first row by
     scanning the table.
@@ -2204,15 +2310,17 @@ int handler::read_first_row(uchar * buf, uint primary_key)
   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
       !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
   {
-    (void) ha_rnd_init(1);
-    while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
-    (void) ha_rnd_end();
+    if ((!(error= ha_rnd_init(1))))
+    {
+      while ((error= ha_rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
+      (void) ha_rnd_end();
+    }
   }
   else
   {
     /* Find the first row through the primary key */
-    (void) ha_index_init(primary_key, 0);
-    error=index_first(buf);
+    if (!(error = ha_index_init(primary_key, 0)))
+      error= ha_index_first(buf);
     (void) ha_index_end();
   }
   DBUG_RETURN(error);
@@ -2399,7 +2507,7 @@ int handler::update_auto_increment()
 
   if ((nr= table->next_number_field->val_int()) != 0 ||
       (table->auto_increment_field_not_null &&
-      thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
+       thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
   {
     /*
       Update next_insert_id if we had already generated a value in this
@@ -2562,8 +2670,9 @@ int handler::update_auto_increment()
 void handler::column_bitmaps_signal()
 {
   DBUG_ENTER("column_bitmaps_signal");
-  DBUG_PRINT("info", ("read_set: 0x%lx  write_set: 0x%lx", (long) table->read_set,
-                      (long) table->write_set));
+  if (table)
+    DBUG_PRINT("info", ("read_set: 0x%lx  write_set: 0x%lx",
+                        (long) table->read_set, (long) table->write_set));
   DBUG_VOID_RETURN;
 }
 
@@ -2597,10 +2706,10 @@ void handler::get_auto_increment(ulonglong offset, ulonglong increment,
   table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
                                         table->read_set);
   column_bitmaps_signal();
-  index_init(table->s->next_number_index, 1);
+  ha_index_init(table->s->next_number_index, 1);
   if (table->s->next_number_keypart == 0)
   {						// Autoincrement at key-start
-    error=index_last(table->record[1]);
+    error=ha_index_last(table->record[1]);
     /*
       MySQL implicitely assumes such method does locking (as MySQL decides to
       use nr+increment without checking again with the handler, in
@@ -2614,9 +2723,10 @@ void handler::get_auto_increment(ulonglong offset, ulonglong increment,
     key_copy(key, table->record[0],
              table->key_info + table->s->next_number_index,
              table->s->next_number_key_offset);
-    error= index_read_map(table->record[1], key,
-                          make_prev_keypart_map(table->s->next_number_keypart),
-                          HA_READ_PREFIX_LAST);
+    error= ha_index_read_map(table->record[1], key,
+                             make_prev_keypart_map(table->s->
+                                                   next_number_keypart),
+                             HA_READ_PREFIX_LAST);
     /*
       MySQL needs to call us for next row: assume we are inserting ("a",null)
       here, we return 3, and next this statement will want to insert
@@ -2631,7 +2741,7 @@ void handler::get_auto_increment(ulonglong offset, ulonglong increment,
   else
     nr= ((ulonglong) table->next_number_field->
          val_int_offset(table->s->rec_buff_length)+1);
-  index_end();
+  ha_index_end();
   (void) extra(HA_EXTRA_NO_KEYREAD);
   *first_value= nr;
 }
@@ -2639,6 +2749,7 @@ void handler::get_auto_increment(ulonglong offset, ulonglong increment,
 
 void handler::ha_release_auto_increment()
 {
+  DBUG_ENTER("ha_release_auto_increment");
   release_auto_increment();
   insert_id_for_cur_row= 0;
   auto_inc_interval_for_cur_row.replace(0, 0, 0);
@@ -2652,6 +2763,7 @@ void handler::ha_release_auto_increment()
     */
     table->in_use->auto_inc_intervals_forced.empty();
   }
+  DBUG_VOID_RETURN;
 }
 
 
@@ -2692,8 +2804,12 @@ void handler::print_keydup_error(uint key_nr, const char *msg)
     - table->s->path
     - table->alias
 */
+
+#define SET_FATAL_ERROR fatal_error=1
+
 void handler::print_error(int error, myf errflag)
 {
+  bool fatal_error= 0;
   DBUG_ENTER("handler::print_error");
   DBUG_PRINT("enter",("error: %d",error));
 
@@ -2708,22 +2824,43 @@ void handler::print_error(int error, myf errflag)
   case ENOENT:
     textno=ER_FILE_NOT_FOUND;
     break;
+  case ENOSPC:
+  case HA_ERR_DISK_FULL:
+    textno= ER_DISK_FULL;
+    SET_FATAL_ERROR;                            // Ensure error is logged
+    break;
   case HA_ERR_KEY_NOT_FOUND:
   case HA_ERR_NO_ACTIVE_RECORD:
   case HA_ERR_RECORD_DELETED:
   case HA_ERR_END_OF_FILE:
+    /*
+      This errors is not not normally fatal (for example for reads). However
+      if you get it during an update or delete, then its fatal.
+      As the user is calling print_error() (which is not done on read), we
+      assume something when wrong with the update or delete.
+    */
+    SET_FATAL_ERROR;
     textno=ER_KEY_NOT_FOUND;
     break;
+  case HA_ERR_ABORTED_BY_USER:
+  {
+    DBUG_ASSERT(table->in_use->killed);
+    table->in_use->send_kill_message();
+    DBUG_VOID_RETURN;
+  }
   case HA_ERR_WRONG_MRG_TABLE_DEF:
     textno=ER_WRONG_MRG_TABLE;
     break;
   case HA_ERR_FOUND_DUPP_KEY:
   {
-    uint key_nr=get_dup_key(error);
-    if ((int) key_nr >= 0)
+    if (table)
     {
-      print_keydup_error(key_nr, ER(ER_DUP_ENTRY_WITH_KEY_NAME));
-      DBUG_VOID_RETURN;
+      uint key_nr=get_dup_key(error);
+      if ((int) key_nr >= 0)
+      {
+        print_keydup_error(key_nr, ER(ER_DUP_ENTRY_WITH_KEY_NAME));
+        DBUG_VOID_RETURN;
+      }
     }
     textno=ER_DUP_KEY;
     break;
@@ -2760,21 +2897,29 @@ void handler::print_error(int error, myf errflag)
     textno=ER_DUP_UNIQUE;
     break;
   case HA_ERR_RECORD_CHANGED:
+    /*
+      This is not fatal error when using HANDLER interface
+      SET_FATAL_ERROR;
+    */
     textno=ER_CHECKREAD;
     break;
   case HA_ERR_CRASHED:
+    SET_FATAL_ERROR;
     textno=ER_NOT_KEYFILE;
     break;
   case HA_ERR_WRONG_IN_RECORD:
+    SET_FATAL_ERROR;
     textno= ER_CRASHED_ON_USAGE;
     break;
   case HA_ERR_CRASHED_ON_USAGE:
+    SET_FATAL_ERROR;
     textno=ER_CRASHED_ON_USAGE;
     break;
   case HA_ERR_NOT_A_TABLE:
     textno= error;
     break;
   case HA_ERR_CRASHED_ON_REPAIR:
+    SET_FATAL_ERROR;
     textno=ER_CRASHED_ON_REPAIR;
     break;
   case HA_ERR_OUT_OF_MEM:
@@ -2883,15 +3028,28 @@ void handler::print_error(int error, myf errflag)
       {
 	const char* engine= table_type();
 	if (temporary)
-	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.ptr(), engine);
+	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.c_ptr(),
+                   engine);
 	else
-	  my_error(ER_GET_ERRMSG, MYF(0), error, str.ptr(), engine);
+        {
+          SET_FATAL_ERROR;
+	  my_error(ER_GET_ERRMSG, MYF(0), error, str.c_ptr(), engine);
+        }
       }
       else
 	my_error(ER_GET_ERRNO,errflag,error);
       DBUG_VOID_RETURN;
     }
   }
+  if (fatal_error && (debug_assert_if_crashed_table ||
+                      global_system_variables.log_warnings > 1))
+  {
+    /*
+      Log error to log before we crash or if extended warnings are requested
+    */
+    errflag|= ME_NOREFRESH;
+  }
+    
   my_error(textno, errflag, table_share->table_name.str, error);
   DBUG_VOID_RETURN;
 }
@@ -3147,7 +3305,7 @@ int handler::rename_table(const char * from, const char * to)
 
 void handler::drop_table(const char *name)
 {
-  close();
+  ha_close();
   delete_table(name);
 }
 
@@ -3195,11 +3353,14 @@ int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
   if it is started.
 */
 
-inline
 void
-handler::mark_trx_read_write()
+handler::mark_trx_read_write_part2()
 {
   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
+
+  /* Don't call this function again for this statement */
+  mark_trx_done= TRUE;
+
   /*
     When a storage engine method is called, the transaction must
     have been started, unless it's a DDL call, for which the
@@ -3445,6 +3606,9 @@ handler::ha_delete_table(const char *name)
   Drop table in the engine: public interface.
 
   @sa handler::drop_table()
+
+  The difference between this and delete_table() is that the table is open in
+  drop_table().
 */
 
 void
@@ -3569,7 +3733,7 @@ int ha_enable_transaction(THD *thd, bool on)
 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
 {
   int error;
-  DBUG_ENTER("index_next_same");
+  DBUG_ENTER("handler::index_next_same");
   if (!(error=index_next(buf)))
   {
     my_ptrdiff_t ptrdiff= buf - table->record[0];
@@ -3614,6 +3778,7 @@ int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
         key_part->field->move_field_offset(-ptrdiff);
     }
   }
+  DBUG_PRINT("return",("%i", error));
   DBUG_RETURN(error);
 }
 
@@ -3633,12 +3798,129 @@ void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
   stat_info->update_time=          stats.update_time;
   stat_info->check_time=           stats.check_time;
   stat_info->check_sum=            0;
-  if (table_flags() & (ulong) HA_HAS_CHECKSUM)
+  if (table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_OLD_CHECKSUM))
     stat_info->check_sum= checksum();
   return;
 }
 
 
+/*
+  Updates the global table stats with the TABLE this handler represents
+*/
+
+void handler::update_global_table_stats()
+{
+  TABLE_STATS * table_stats;
+
+  status_var_add(table->in_use->status_var.rows_read, rows_read);
+  DBUG_ASSERT(rows_tmp_read == 0);
+
+  if (!table->in_use->userstat_running)
+  {
+    rows_read= rows_changed= 0;
+    return;
+  }
+
+  if (rows_read + rows_changed == 0)
+    return;                                     // Nothing to update.
+
+  DBUG_ASSERT(table->s && table->s->table_cache_key.str);
+
+  mysql_mutex_lock(&LOCK_global_table_stats);
+  /* Gets the global table stats, creating one if necessary. */
+  if (!(table_stats= (TABLE_STATS*)
+        my_hash_search(&global_table_stats,
+                    (uchar*) table->s->table_cache_key.str,
+                    table->s->table_cache_key.length)))
+  {
+    if (!(table_stats = ((TABLE_STATS*)
+                         my_malloc(sizeof(TABLE_STATS),
+                                   MYF(MY_WME | MY_ZEROFILL)))))
+    {
+      /* Out of memory error already given */
+      goto end;
+    }
+    memcpy(table_stats->table, table->s->table_cache_key.str,
+           table->s->table_cache_key.length);
+    table_stats->table_name_length= table->s->table_cache_key.length;
+    table_stats->engine_type= ht->db_type;
+    /* No need to set variables to 0, as we use MY_ZEROFILL above */
+
+    if (my_hash_insert(&global_table_stats, (uchar*) table_stats))
+    {
+      /* Out of memory error is already given */
+      my_free(table_stats);
+      goto end;
+    }
+  }
+  // Updates the global table stats.
+  table_stats->rows_read+=    rows_read;
+  table_stats->rows_changed+= rows_changed;
+  table_stats->rows_changed_x_indexes+= (rows_changed *
+                                         (table->s->keys ? table->s->keys :
+                                          1));
+  rows_read= rows_changed= 0;
+end:
+  mysql_mutex_unlock(&LOCK_global_table_stats);
+}
+
+
+/*
+  Updates the global index stats with this handler's accumulated index reads.
+*/
+
+void handler::update_global_index_stats()
+{
+  DBUG_ASSERT(table->s);
+
+  if (!table->in_use->userstat_running)
+  {
+    /* Reset all index read values */
+    bzero(index_rows_read, sizeof(index_rows_read[0]) * table->s->keys);
+    return;
+  }
+
+  for (uint index = 0; index < table->s->keys; index++)
+  {
+    if (index_rows_read[index])
+    {
+      INDEX_STATS* index_stats;
+      uint key_length;
+      KEY *key_info = &table->key_info[index];  // Rows were read using this
+
+      DBUG_ASSERT(key_info->cache_name);
+      if (!key_info->cache_name)
+        continue;
+      key_length= table->s->table_cache_key.length + key_info->name_length + 1;
+      mysql_mutex_lock(&LOCK_global_index_stats);
+      // Gets the global index stats, creating one if necessary.
+      if (!(index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
+                                                    key_info->cache_name,
+                                                    key_length)))
+      {
+        if (!(index_stats = ((INDEX_STATS*)
+                             my_malloc(sizeof(INDEX_STATS),
+                                       MYF(MY_WME | MY_ZEROFILL)))))
+          goto end;                             // Error is already given
+
+        memcpy(index_stats->index, key_info->cache_name, key_length);
+        index_stats->index_name_length= key_length;
+        if (my_hash_insert(&global_index_stats, (uchar*) index_stats))
+        {
+          my_free(index_stats);
+          goto end;
+        }
+      }
+      /* Updates the global index stats. */
+      index_stats->rows_read+= index_rows_read[index];
+      index_rows_read[index]= 0;
+end:
+      mysql_mutex_unlock(&LOCK_global_index_stats);
+    }
+  }
+}
+
+
 /****************************************************************************
 ** Some general functions that isn't in the handler class
 ****************************************************************************/
@@ -3782,6 +4064,7 @@ ha_check_if_table_exists(THD* thd, const char *db, const char *name,
 void st_ha_check_opt::init()
 {
   flags= sql_flags= 0;
+  start_time= my_time(0);
 }
 
 
@@ -3798,7 +4081,8 @@ void st_ha_check_opt::init()
 /**
   Init a key cache if it has not been initied before.
 */
-int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
+int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *unused
+                      __attribute__((unused)))
 {
   DBUG_ENTER("ha_init_key_cache");
 
@@ -3809,11 +4093,13 @@ int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
     uint tmp_block_size= (uint) key_cache->param_block_size;
     uint division_limit= key_cache->param_division_limit;
     uint age_threshold=  key_cache->param_age_threshold;
+    uint partitions= key_cache->param_partitions;
     mysql_mutex_unlock(&LOCK_global_system_variables);
     DBUG_RETURN(!init_key_cache(key_cache,
 				tmp_block_size,
 				tmp_buff_size,
-				division_limit, age_threshold));
+				division_limit, age_threshold,
+                                partitions));
   }
   DBUG_RETURN(0);
 }
@@ -3843,10 +4129,12 @@ int ha_resize_key_cache(KEY_CACHE *key_cache)
 
 
 /**
-  Change parameters for key cache (like size)
+  Change parameters for key cache (like division_limit)
 */
 int ha_change_key_cache_param(KEY_CACHE *key_cache)
 {
+  DBUG_ENTER("ha_change_key_cache_param");
+
   if (key_cache->key_cache_inited)
   {
     mysql_mutex_lock(&LOCK_global_system_variables);
@@ -3855,9 +4143,35 @@ int ha_change_key_cache_param(KEY_CACHE *key_cache)
     mysql_mutex_unlock(&LOCK_global_system_variables);
     change_key_cache_param(key_cache, division_limit, age_threshold);
   }
-  return 0;
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Repartition key cache 
+*/
+int ha_repartition_key_cache(KEY_CACHE *key_cache)
+{
+  DBUG_ENTER("ha_repartition_key_cache");
+
+  if (key_cache->key_cache_inited)
+  {
+    mysql_mutex_lock(&LOCK_global_system_variables);
+    size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
+    long tmp_block_size= (long) key_cache->param_block_size;
+    uint division_limit= key_cache->param_division_limit;
+    uint age_threshold=  key_cache->param_age_threshold;
+    uint partitions= key_cache->param_partitions;
+    mysql_mutex_unlock(&LOCK_global_system_variables);
+    DBUG_RETURN(!repartition_key_cache(key_cache, tmp_block_size,
+				       tmp_buff_size,
+				       division_limit, age_threshold,
+                                       partitions));
+  }
+  DBUG_RETURN(0);
 }
 
+
 /**
   Move all tables from one key cache to another one.
 */
@@ -3958,7 +4272,7 @@ ha_find_files(THD *thd,const char *db,const char *path,
   int error= 0;
   DBUG_ENTER("ha_find_files");
   DBUG_PRINT("enter", ("db: '%s'  path: '%s'  wild: '%s'  dir: %d", 
-		       db, path, wild ? wild : "NULL", dir));
+		       db, path, wild, dir));
   st_find_files_args args= {db, path, wild, dir, files};
 
   plugin_foreach(thd, find_files_handlerton,
@@ -4150,133 +4464,6 @@ void ha_binlog_log_query(THD *thd, handlerton *hton,
 }
 #endif
 
-/**
-  Read the first row of a multi-range set.
-
-  @param found_range_p       Returns a pointer to the element in 'ranges' that
-                             corresponds to the returned row.
-  @param ranges              An array of KEY_MULTI_RANGE range descriptions.
-  @param range_count         Number of ranges in 'ranges'.
-  @param sorted	      If result should be sorted per key.
-  @param buffer              A HANDLER_BUFFER for internal handler usage.
-
-  @note
-    - Record is read into table->record[0].
-    - *found_range_p returns a valid value only if read_multi_range_first()
-    returns 0.
-    - Sorting is done within each range. If you want an overall sort, enter
-    'ranges' with sorted ranges.
-
-  @retval
-    0			OK, found a row
-  @retval
-    HA_ERR_END_OF_FILE	No rows in range
-  @retval
-    \#			Error code
-*/
-int handler::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                    KEY_MULTI_RANGE *ranges, uint range_count,
-                                    bool sorted, HANDLER_BUFFER *buffer)
-{
-  int result= HA_ERR_END_OF_FILE;
-  DBUG_ENTER("handler::read_multi_range_first");
-  multi_range_sorted= sorted;
-  multi_range_buffer= buffer;
-
-  table->mark_columns_used_by_index_no_reset(active_index, table->read_set);
-  table->column_bitmaps_set(table->read_set, table->write_set);
-
-  for (multi_range_curr= ranges, multi_range_end= ranges + range_count;
-       multi_range_curr < multi_range_end;
-       multi_range_curr++)
-  {
-    result= read_range_first(multi_range_curr->start_key.keypart_map ?
-                             &multi_range_curr->start_key : 0,
-                             multi_range_curr->end_key.keypart_map ?
-                             &multi_range_curr->end_key : 0,
-                             test(multi_range_curr->range_flag & EQ_RANGE),
-                             multi_range_sorted);
-    if (result != HA_ERR_END_OF_FILE)
-      break;
-  }
-
-  *found_range_p= multi_range_curr;
-  DBUG_PRINT("exit",("result %d", result));
-  DBUG_RETURN(result);
-}
-
-
-/**
-  Read the next row of a multi-range set.
-
-  @param found_range_p       Returns a pointer to the element in 'ranges' that
-                             corresponds to the returned row.
-
-  @note
-    - Record is read into table->record[0].
-    - *found_range_p returns a valid value only if read_multi_range_next()
-    returns 0.
-
-  @retval
-    0			OK, found a row
-  @retval
-    HA_ERR_END_OF_FILE	No (more) rows in range
-  @retval
-    \#			Error code
-*/
-int handler::read_multi_range_next(KEY_MULTI_RANGE **found_range_p)
-{
-  int UNINIT_VAR(result);
-  DBUG_ENTER("handler::read_multi_range_next");
-
-  /* We should not be called after the last call returned EOF. */
-  DBUG_ASSERT(multi_range_curr < multi_range_end);
-
-  do
-  {
-    /* Save a call if there can be only one row in range. */
-    if (multi_range_curr->range_flag != (UNIQUE_RANGE | EQ_RANGE))
-    {
-      result= read_range_next();
-
-      /* On success or non-EOF errors jump to the end. */
-      if (result != HA_ERR_END_OF_FILE)
-        break;
-    }
-    else
-    {
-      if (was_semi_consistent_read())
-        goto scan_it_again;
-      /*
-        We need to set this for the last range only, but checking this
-        condition is more expensive than just setting the result code.
-      */
-      result= HA_ERR_END_OF_FILE;
-    }
-
-    multi_range_curr++;
-scan_it_again:
-    /* Try the next range(s) until one matches a record. */
-    for (; multi_range_curr < multi_range_end; multi_range_curr++)
-    {
-      result= read_range_first(multi_range_curr->start_key.keypart_map ?
-                               &multi_range_curr->start_key : 0,
-                               multi_range_curr->end_key.keypart_map ?
-                               &multi_range_curr->end_key : 0,
-                               test(multi_range_curr->range_flag & EQ_RANGE),
-                               multi_range_sorted);
-      if (result != HA_ERR_END_OF_FILE)
-        break;
-    }
-  }
-  while ((result == HA_ERR_END_OF_FILE) &&
-         (multi_range_curr < multi_range_end));
-
-  *found_range_p= multi_range_curr;
-  DBUG_PRINT("exit",("handler::read_multi_range_next: result %d", result));
-  DBUG_RETURN(result);
-}
-
 
 /**
   Read first row between two ranges.
@@ -4316,17 +4503,16 @@ int handler::read_range_first(const key_range *start_key,
   range_key_part= table->key_info[active_index].key_part;
 
   if (!start_key)			// Read first record
-    result= index_first(table->record[0]);
+    result= ha_index_first(table->record[0]);
   else
-    result= index_read_map(table->record[0],
-                           start_key->key,
-                           start_key->keypart_map,
-                           start_key->flag);
+    result= ha_index_read_map(table->record[0],
+                              start_key->key,
+                              start_key->keypart_map,
+                              start_key->flag);
   if (result)
     DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) 
 		? HA_ERR_END_OF_FILE
 		: result);
-
   DBUG_RETURN (compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
 }
 
@@ -4352,11 +4538,11 @@ int handler::read_range_next()
   if (eq_range)
   {
     /* We trust that index_next_same always gives a row in range */
-    DBUG_RETURN(index_next_same(table->record[0],
-                                end_range->key,
-                                end_range->length));
+    DBUG_RETURN(ha_index_next_same(table->record[0],
+                                   end_range->key,
+                                   end_range->length));
   }
-  result= index_next(table->record[0]);
+  result= ha_index_next(table->record[0]);
   if (result)
     DBUG_RETURN(result);
   DBUG_RETURN(compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
@@ -4381,7 +4567,7 @@ int handler::read_range_next()
 int handler::compare_key(key_range *range)
 {
   int cmp;
-  if (!range)
+  if (!range || in_range_check_pushed_down)
     return 0;					// No max range
   cmp= key_cmp(range_key_part, range->key, range->length);
   if (!cmp)
@@ -4390,16 +4576,35 @@ int handler::compare_key(key_range *range)
 }
 
 
+/*
+  Same as compare_key() but doesn't check have in_range_check_pushed_down.
+  This is used by index condition pushdown implementation.
+*/
+
+int handler::compare_key2(key_range *range)
+{
+  int cmp;
+  if (!range)
+    return 0;					// no max range
+  cmp= key_cmp(range_key_part, range->key, range->length);
+  if (!cmp)
+    cmp= key_compare_result_on_equal;
+  return cmp;
+}
+
+
 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
                                 key_part_map keypart_map,
                                 enum ha_rkey_function find_flag)
 {
   int error, error1;
-  error= index_init(index, 0);
+  LINT_INIT(error1);
+
+  error= ha_index_init(index, 0);
   if (!error)
   {
     error= index_read_map(buf, key, keypart_map, find_flag);
-    error1= index_end();
+    error1= ha_index_end();
   }
   return error ?  error : error1;
 }
@@ -4524,7 +4729,7 @@ bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
   {
     if (db_type->state != SHOW_OPTION_YES)
     {
-      const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
+      const LEX_STRING *name= hton_name(db_type);
       result= stat_print(thd, name->str, name->length,
                          "", 0, "DISABLED", 8) ? 1 : 0;
     }
@@ -4535,6 +4740,8 @@ bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
 
   if (!result)
     my_eof(thd);
+  else if (!thd->is_error())
+    my_error(ER_GET_ERRNO, MYF(0), 0);
   return result;
 }
 
@@ -4572,7 +4779,8 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table)
 
 /** @brief
    Write table maps for all (manually or automatically) locked tables
-   to the binary log.
+   to the binary log. Also, if binlog_annotate_rows_events is ON,
+   write Annotate_rows event before the first table map.
 
    SYNOPSIS
      write_locked_table_maps()
@@ -4604,6 +4812,9 @@ static int write_locked_table_maps(THD *thd)
     MYSQL_LOCK *locks[2];
     locks[0]= thd->extra_lock;
     locks[1]= thd->lock;
+    my_bool with_annotate= thd->variables.binlog_annotate_rows_events &&
+                           thd->query() && thd->query_length();
+
     for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
     {
       MYSQL_LOCK const *const lock= locks[i];
@@ -4635,7 +4846,8 @@ static int write_locked_table_maps(THD *thd)
           */
           bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
                                 table->file->has_transactions();
-          int const error= thd->binlog_write_table_map(table, has_trans);
+          int const error= thd->binlog_write_table_map(table, has_trans,
+                                                       &with_annotate);
           /*
             If an error occurs, it is the responsibility of the caller to
             roll back the transaction.
@@ -4796,12 +5008,13 @@ int handler::ha_write_row(uchar *buf)
 
   MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
   mark_trx_read_write();
+  increment_statistics(&SSV::ha_write_count);
 
   error= write_row(buf);
   MYSQL_INSERT_ROW_DONE(error);
   if (unlikely(error))
     DBUG_RETURN(error);
-
+  rows_changed++;
   if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
     DBUG_RETURN(error); /* purecov: inspected */
   DBUG_RETURN(0);
@@ -4821,11 +5034,13 @@ int handler::ha_update_row(const uchar *old_data, uchar *new_data)
 
   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
   mark_trx_read_write();
+  increment_statistics(&SSV::ha_update_count);
 
   error= update_row(old_data, new_data);
   MYSQL_UPDATE_ROW_DONE(error);
   if (unlikely(error))
     return error;
+  rows_changed++;
   if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
     return error;
   return 0;
@@ -4838,11 +5053,13 @@ int handler::ha_delete_row(const uchar *buf)
 
   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
   mark_trx_read_write();
+  increment_statistics(&SSV::ha_delete_count);
 
   error= delete_row(buf);
   MYSQL_DELETE_ROW_DONE(error);
   if (unlikely(error))
     return error;
+  rows_changed++;
   if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
     return error;
   return 0;
diff --git a/sql/handler.h b/sql/handler.h
index d1222cb56a6..7a75253f040 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1,8 +1,8 @@
 #ifndef HANDLER_INCLUDED
 #define HANDLER_INCLUDED
-
 /*
-   Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
@@ -36,6 +36,10 @@
 #include <ft_global.h>
 #include <keycache.h>
 
+#if MAX_KEY > 128
+#error MAX_KEY is too large.  Values up to 128 are supported.
+#endif
+
 // the following is for checking tables
 
 #define HA_ADMIN_ALREADY_DONE	  1
@@ -116,7 +120,8 @@
 #define HA_CAN_FULLTEXT        (1 << 21)
 #define HA_CAN_SQL_HANDLER     (1 << 22)
 #define HA_NO_AUTO_INCREMENT   (1 << 23)
-#define HA_HAS_CHECKSUM        (1 << 24)
+/* Has automatic checksums and uses the old checksum format */
+#define HA_HAS_OLD_CHECKSUM    (1 << 24)
 /* Table data are stored in separate files (for lower_case_table_names) */
 #define HA_FILE_BASED	       (1 << 26)
 #define HA_NO_VARCHAR	       (1 << 27)
@@ -156,6 +161,7 @@
     ordered.
 */
 #define HA_DUPLICATE_KEY_NOT_IN_ORDER    (LL(1) << 36)
+
 /*
   Engine supports REPAIR TABLE. Used by CHECK TABLE FOR UPGRADE if an
   incompatible table is detected. If this flag is set, CHECK TABLE FOR UPGRADE
@@ -163,6 +169,12 @@
 */
 #define HA_CAN_REPAIR                    (LL(1) << 37)
 
+/* Has automatic checksums and uses the new checksum format */
+#define HA_HAS_NEW_CHECKSUM    (LL(1) << 38)
+#define HA_CAN_VIRTUAL_COLUMNS (LL(1) << 39)
+#define HA_MRR_CANT_SORT       (LL(1) << 40)
+#define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (LL(1) << 41)
+
 /*
   Set of all binlog flags. Currently only contain the capabilities
   flags.
@@ -178,6 +190,18 @@
 #define HA_KEYREAD_ONLY         64	/* Support HA_EXTRA_KEYREAD */
 
 /*
+  Index scan will not return records in rowid order. Not guaranteed to be
+  set for unordered (e.g. HASH) indexes.
+*/
+#define HA_KEY_SCAN_NOT_ROR     128 
+#define HA_DO_INDEX_COND_PUSHDOWN  256 /* Supports Index Condition Pushdown */
+/*
+  Data is clustered on this key. This means that when you read the key
+  you also get the row data without any additional disk reads.
+*/
+#define HA_CLUSTERED_INDEX      512
+
+/*
   bits in alter_table_flags:
 */
 /*
@@ -235,12 +259,6 @@
 #define HA_FAST_CHANGE_PARTITION                (1L << 13)
 #define HA_PARTITION_ONE_PHASE                  (1L << 14)
 
-/*
-  Index scan will not return records in rowid order. Not guaranteed to be
-  set for unordered (e.g. HASH) indexes.
-*/
-#define HA_KEY_SCAN_NOT_ROR     128 
-
 /* operations for disable/enable indexes */
 #define HA_KEY_SWITCH_NONUNIQ      0
 #define HA_KEY_SWITCH_ALL          1
@@ -287,8 +305,6 @@
 #define HA_LEX_CREATE_TMP_TABLE	1
 #define HA_LEX_CREATE_IF_NOT_EXISTS 2
 #define HA_LEX_CREATE_TABLE_LIKE 4
-#define HA_OPTION_NO_CHECKSUM	(1L << 17)
-#define HA_OPTION_NO_DELAY_KEY_WRITE (1L << 18)
 #define HA_MAX_REC_LENGTH	65535
 
 /* Table caching type */
@@ -329,6 +345,11 @@ enum legacy_db_type
   DB_TYPE_FIRST_DYNAMIC=42,
   DB_TYPE_DEFAULT=127 // Must be last
 };
+/*
+  Better name for DB_TYPE_UNKNOWN. Should be used for engines that do not have
+  a hard-coded type value here.
+ */
+#define DB_TYPE_AUTOASSIGN DB_TYPE_UNKNOWN
 
 enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED,
 		ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED,
@@ -377,9 +398,8 @@ enum enum_binlog_command {
 #define HA_CREATE_USED_PASSWORD         (1L << 17)
 #define HA_CREATE_USED_CONNECTION       (1L << 18)
 #define HA_CREATE_USED_KEY_BLOCK_SIZE   (1L << 19)
-/** Unused. Reserved for future versions. */
+/* The following two are used by Maria engine: */
 #define HA_CREATE_USED_TRANSACTIONAL    (1L << 20)
-/** Unused. Reserved for future versions. */
 #define HA_CREATE_USED_PAGE_CHECKSUM    (1L << 21)
 
 typedef ulonglong my_xid; // this line is the same as in log_event.h
@@ -560,6 +580,7 @@ struct TABLE;
 enum enum_schema_tables
 {
   SCH_CHARSETS= 0,
+  SCH_CLIENT_STATS,
   SCH_COLLATIONS,
   SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
   SCH_COLUMNS,
@@ -569,6 +590,8 @@ enum enum_schema_tables
   SCH_FILES,
   SCH_GLOBAL_STATUS,
   SCH_GLOBAL_VARIABLES,
+  SCH_INDEX_STATS,
+  SCH_KEY_CACHES,
   SCH_KEY_COLUMN_USAGE,
   SCH_OPEN_TABLES,
   SCH_PARAMETERS,
@@ -589,8 +612,10 @@ enum enum_schema_tables
   SCH_TABLE_CONSTRAINTS,
   SCH_TABLE_NAMES,
   SCH_TABLE_PRIVILEGES,
+  SCH_TABLE_STATS,
   SCH_TRIGGERS,
   SCH_USER_PRIVILEGES,
+  SCH_USER_STATS,
   SCH_VARIABLES,
   SCH_VIEWS
 };
@@ -629,6 +654,108 @@ struct handler_log_file_data {
   enum log_status status;
 };
 
+/*
+  Definitions for engine-specific table/field/index options in the CREATE TABLE.
+
+  Options are declared with HA_*OPTION_* macros (HA_TOPTION_NUMBER,
+  HA_FOPTION_ENUM, HA_IOPTION_STRING, etc).
+
+  Every macros takes the option name, and the name of the underlying field of
+  the appropriate C structure. The "appropriate C structure" is
+  ha_table_option_struct for table level options,
+  ha_field_option_struct for field level options,
+  ha_index_option_struct for key level options. The engine either
+  defines a structure of this name, or uses #define's to map
+  these "appropriate" names to the actual structure type name.
+
+  ULL options use a ulonglong as the backing store.
+  HA_*OPTION_NUMBER() takes the option name, the structure field name,
+  the default value for the option, min, max, and blk_siz values.
+
+  STRING options use a char* as a backing store.
+  HA_*OPTION_STRING takes the option name and the structure field name.
+  The default value will be 0.
+
+  ENUM options use a uint as a backing store (not enum!!!).
+  HA_*OPTION_ENUM takes the option name, the structure field name,
+  the default value for the option as a number, and a string with the
+  permitted values for this enum - one string with comma separated values,
+  for example: "gzip,bzip2,lzma"
+
+  BOOL options use a bool as a backing store.
+  HA_*OPTION_BOOL takes the option name, the structure field name,
+  and the default value for the option.
+  From the SQL, BOOL options accept YES/NO, ON/OFF, and 1/0.
+
+  The name of the option is limited to 255 bytes,
+  the value (for string options) - to the 32767 bytes.
+
+  See ha_example.cc for an example.
+*/
+
+struct ha_table_option_struct;
+struct ha_field_option_struct;
+struct ha_index_option_struct;
+
+enum ha_option_type { HA_OPTION_TYPE_ULL,    /* unsigned long long */
+                      HA_OPTION_TYPE_STRING, /* char * */
+                      HA_OPTION_TYPE_ENUM,   /* uint */
+                      HA_OPTION_TYPE_BOOL};  /* bool */
+
+#define HA_xOPTION_NUMBER(name, struc, field, def, min, max, blk_siz)   \
+  { HA_OPTION_TYPE_ULL, name, sizeof(name)-1,                        \
+    offsetof(struc, field), def, min, max, blk_siz, 0 }
+#define HA_xOPTION_STRING(name, struc, field)                        \
+  { HA_OPTION_TYPE_STRING, name, sizeof(name)-1,                     \
+    offsetof(struc, field), 0, 0, 0, 0, 0 }
+#define HA_xOPTION_ENUM(name, struc, field, values, def)             \
+  { HA_OPTION_TYPE_ENUM, name, sizeof(name)-1,                       \
+    offsetof(struc, field), def, 0,                                  \
+    sizeof(values)-1, 0, values }
+#define HA_xOPTION_BOOL(name, struc, field, def)                     \
+  { HA_OPTION_TYPE_BOOL, name, sizeof(name)-1,                       \
+    offsetof(struc, field), def, 0, 1, 0, 0 }
+#define HA_xOPTION_END { HA_OPTION_TYPE_ULL, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+#define HA_TOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_table_option_struct, field, def, min, max, blk_siz)
+#define HA_TOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_table_option_struct, field)
+#define HA_TOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_table_option_struct, field, values, def)
+#define HA_TOPTION_BOOL(name, field, def)                            \
+  HA_xOPTION_BOOL(name, ha_table_option_struct, field, def)
+#define HA_TOPTION_END HA_xOPTION_END
+
+#define HA_FOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_field_option_struct, field, def, min, max, blk_siz)
+#define HA_FOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_field_option_struct, field)
+#define HA_FOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_field_option_struct, field, values, def)
+#define HA_FOPTION_BOOL(name, field, def)                            \
+  HA_xOPTION_BOOL(name, ha_field_option_struct, field, def)
+#define HA_FOPTION_END HA_xOPTION_END
+
+#define HA_IOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_index_option_struct, field, def, min, max, blk_siz)
+#define HA_IOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_index_option_struct, field)
+#define HA_IOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_index_option_struct, field, values, def)
+#define HA_IOPTION_BOOL(name, field, values, def)                    \
+  HA_xOPTION_BOOL(name, ha_index_option_struct, field, values, def)
+#define HA_IOPTION_END HA_xOPTION_END
+
+typedef struct st_ha_create_table_option {
+  enum ha_option_type type;
+  const char *name;
+  size_t name_length;
+  ptrdiff_t offset;
+  ulonglong def_value;
+  ulonglong min_value, max_value, block_size;
+  const char *values;
+} ha_create_table_option;
 
 enum handler_iterator_type
 {
@@ -690,8 +817,9 @@ struct handlerton
   SHOW_COMP_OPTION state;
 
   /*
-    Historical number used for frm file to determine the correct storage engine.
-    This is going away and new engines will just use "name" for this.
+    Historical number used for frm file to determine the correct
+    storage engine.  This is going away and new engines will just use
+    "name" for this.
   */
   enum legacy_db_type db_type;
   /*
@@ -741,12 +869,113 @@ struct handlerton
      NOTE 'all' is also false in auto-commit mode where 'end of statement'
      and 'real commit' mean the same event.
    */
-   int  (*commit)(handlerton *hton, THD *thd, bool all);
+   int (*commit)(handlerton *hton, THD *thd, bool all);
+   /*
+     The commit_ordered() method is called prior to the commit() method, after
+     the transaction manager has decided to commit (not rollback) the
+     transaction. Unlike commit(), commit_ordered() is called only when the
+     full transaction is committed, not for each commit of statement
+     transaction in a multi-statement transaction.
+
+     Not that like prepare(), commit_ordered() is only called when 2-phase
+     commit takes place. Ie. when no binary log and only a single engine
+     participates in a transaction, one commit() is called, no
+     commit_ordered(). So engines must be prepared for this.
+
+     The calls to commit_ordered() in multiple parallel transactions is
+     guaranteed to happen in the same order in every participating
+     handler. This can be used to ensure the same commit order among multiple
+     handlers (eg. in table handler and binlog). So if transaction T1 calls
+     into commit_ordered() of handler A before T2, then T1 will also call
+     commit_ordered() of handler B before T2.
+
+     Engines that implement this method should during this call make the
+     transaction visible to other transactions, thereby making the order of
+     transaction commits be defined by the order of commit_ordered() calls.
+
+     The intention is that commit_ordered() should do the minimal amount of
+     work that needs to happen in consistent commit order among handlers. To
+     preserve ordering, calls need to be serialised on a global mutex, so
+     doing any time-consuming or blocking operations in commit_ordered() will
+     limit scalability.
+
+     Handlers can rely on commit_ordered() calls to be serialised (no two
+     calls can run in parallel, so no extra locking on the handler part is
+     required to ensure this).
+
+     Note that commit_ordered() can be called from a different thread than the
+     one handling the transaction! So it can not do anything that depends on
+     thread local storage, in particular it can not call my_error() and
+     friends (instead it can store the error code and delay the call of
+     my_error() to the commit() method).
+
+     Similarly, since commit_ordered() returns void, any return error code
+     must be saved and returned from the commit() method instead.
+
+     The commit_ordered method is optional, and can be left unset if not
+     needed in a particular handler (then there will be no ordering guarantees
+     wrt. other engines and binary log).
+   */
+   void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
    int  (*rollback)(handlerton *hton, THD *thd, bool all);
    int  (*prepare)(handlerton *hton, THD *thd, bool all);
+   /*
+     The prepare_ordered method is optional. If set, it will be called after
+     successful prepare() in all handlers participating in 2-phase
+     commit. Like commit_ordered(), it is called only when the full
+     transaction is committed, not for each commit of statement transaction.
+
+     The calls to prepare_ordered() among multiple parallel transactions are
+     ordered consistently with calls to commit_ordered(). This means that
+     calls to prepare_ordered() effectively define the commit order, and that
+     each handler will see the same sequence of transactions calling into
+     prepare_ordered() and commit_ordered().
+
+     Thus, prepare_ordered() can be used to define commit order for handlers
+     that need to do this in the prepare step (like binlog). It can also be
+     used to release transaction's locks early in an order consistent with the
+     order transactions will be eventually committed.
+
+     Like commit_ordered(), prepare_ordered() calls are serialised to maintain
+     ordering, so the intention is that they should execute fast, with only
+     the minimal amount of work needed to define commit order. Handlers can
+     rely on this serialisation, and do not need to do any extra locking to
+     avoid two prepare_ordered() calls running in parallel.
+
+     Like commit_ordered(), prepare_ordered() is not guaranteed to be called
+     in the context of the thread handling the rest of the transaction. So it
+     cannot invoke code that relies on thread local storage, in particular it
+     cannot call my_error().
+
+     prepare_ordered() cannot cause a rollback by returning an error, all
+     possible errors must be handled in prepare() (the prepare_ordered()
+     method returns void). In case of some fatal error, a record of the error
+     must be made internally by the engine and returned from commit() later.
+
+     Note that for user-level XA SQL commands, no consistent ordering among
+     prepare_ordered() and commit_ordered() is guaranteed (as that would
+     require blocking all other commits for an indefinite time).
+
+     When 2-phase commit is not used (eg. only one engine (and no binlog) in
+     transaction), neither prepare() nor prepare_ordered() is called.
+   */
+   void (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
    int  (*recover)(handlerton *hton, XID *xid_list, uint len);
    int  (*commit_by_xid)(handlerton *hton, XID *xid);
    int  (*rollback_by_xid)(handlerton *hton, XID *xid);
+  /*
+    "Disable or enable checkpointing internal to the storage engine. This is
+    used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that
+    the engine will never start any recovery from a time between
+    FLUSH TABLES ... ; UNLOCK TABLES.
+
+    While checkpointing is disabled, the engine should pause any background
+    write activity (such as tablespace checkpointing) that require consistency
+    between different files (such as transaction log and tablespace files) for
+    crash recovery to succeed. The idea is to use this to make safe
+    multi-volume LVM snapshot backups.
+  */
+   int  (*checkpoint_state)(handlerton *hton, bool disabled);
    void *(*create_cursor_read_view)(handlerton *hton, THD *thd);
    void (*set_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
    void (*close_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
@@ -801,17 +1030,28 @@ struct handlerton
    int (*table_exists_in_engine)(handlerton *hton, THD* thd, const char *db,
                                  const char *name);
    uint32 license; /* Flag for Engine License */
-   void *data; /* Location for engines to keep personal structures */
+   /*
+     Optional clauses in the CREATE/ALTER TABLE
+   */
+   ha_create_table_option *table_options; // table level options
+   ha_create_table_option *field_options; // these are specified per field
+   ha_create_table_option *index_options; // these are specified per index
+
 };
 
 
+inline LEX_STRING *hton_name(const handlerton *hton)
+{
+  return &(hton2plugin[hton->slot]->name);
+}
+
+
 /* Possible flags of a handlerton (there can be 32 of them) */
 #define HTON_NO_FLAGS                 0
 #define HTON_CLOSE_CURSORS_AT_COMMIT (1 << 0)
 #define HTON_ALTER_NOT_SUPPORTED     (1 << 1) //Engine does not support alter
 #define HTON_CAN_RECREATE            (1 << 2) //Delete all is used fro truncate
 #define HTON_HIDDEN                  (1 << 3) //Engine does not appear in lists
-#define HTON_FLUSH_AFTER_RENAME      (1 << 4)
 #define HTON_NOT_USER_SELECTABLE     (1 << 5)
 #define HTON_TEMPORARY_NOT_SUPPORTED (1 << 6) //Having temporary tables not supported
 #define HTON_SUPPORT_LOG_TABLES      (1 << 7) //Engine supports log tables
@@ -857,6 +1097,7 @@ struct THD_TRANS
 
   void reset() { no_2pc= FALSE; modified_non_trans_table= FALSE; }
   bool is_empty() const { return ha_list == NULL; }
+  THD_TRANS() {}                        /* Remove gcc warning */
 };
 
 
@@ -967,9 +1208,9 @@ typedef struct {
   ulonglong delete_length;
   ha_rows records;
   ulong mean_rec_length;
-  ulong create_time;
-  ulong check_time;
-  ulong update_time;
+  time_t create_time;
+  time_t check_time;
+  time_t update_time;
   ulonglong check_sum;
 } PARTITION_STATS;
 
@@ -982,7 +1223,7 @@ class partition_info;
 struct st_partition_iter;
 #define NOT_A_PARTITION_ID ((uint32)-1)
 
-enum enum_ha_unused { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
+enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
 
 typedef struct st_ha_create_information
 {
@@ -1014,11 +1255,16 @@ typedef struct st_ha_create_information
   uint options;				/* OR of HA_CREATE_ options */
   uint merge_insert_method;
   uint extra_size;                      /* length of extra data segment */
-  enum enum_ha_unused unused1;
-  bool frm_only;                        /* 1 if no ha_create_table() */
-  bool varchar;                         /* 1 if table has a VARCHAR */
-  enum ha_storage_media storage_media;  /* DEFAULT, DISK or MEMORY */
-  enum enum_ha_unused unused2;
+  enum ha_choice transactional;
+  bool frm_only;                        ///< 1 if no ha_create_table()
+  bool varchar;                         ///< 1 if table has a VARCHAR
+  enum ha_storage_media storage_media;  ///< DEFAULT, DISK or MEMORY
+  enum ha_choice page_checksum;         ///< If we have page_checksums
+  engine_option_value *option_list;     ///< list of table create options
+  /* the following three are only for ALTER TABLE, check_if_incompatible_data() */
+  ha_table_option_struct *option_struct;           ///< structure with parsed table options
+  ha_field_option_struct **fields_option_struct;   ///< array of field option structures
+  ha_index_option_struct **indexes_option_struct;  ///< array of index option structures
 } HA_CREATE_INFO;
 
 
@@ -1093,11 +1339,250 @@ typedef struct st_ha_check_opt
   st_ha_check_opt() {}                        /* Remove gcc warning */
   uint flags;       /* isam layer flags (e.g. for myisamchk) */
   uint sql_flags;   /* sql layer flags - for something myisamchk cannot do */
-  KEY_CACHE *key_cache;	/* new key cache when changing key cache */
+  time_t start_time;   /* When check/repair starts */
+  KEY_CACHE *key_cache; /* new key cache when changing key cache */
   void init();
 } HA_CHECK_OPT;
 
 
+/********************************************************************************
+ * MRR
+ ********************************************************************************/
+
+typedef void *range_seq_t;
+
+typedef struct st_range_seq_if
+{
+  /*
+    Get key information
+ 
+    SYNOPSIS
+      get_key_info()
+        init_params  The seq_init_param parameter 
+        length       OUT length of the keys in this range sequence
+        map          OUT key_part_map of the keys in this range sequence
+
+    DESCRIPTION
+      This function is set only when using HA_MRR_FIXED_KEY mode. In that mode, 
+      all ranges are single-point equality ranges that use the same set of key
+      parts. This function allows the MRR implementation to get the length of
+      a key, and which keyparts it uses.
+  */
+  void (*get_key_info)(void *init_params, uint *length, key_part_map *map);
+
+  /*
+    Initialize the traversal of range sequence
+    
+    SYNOPSIS
+      init()
+        init_params  The seq_init_param parameter 
+        n_ranges     The number of ranges obtained 
+        flags        A combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+    RETURN
+      An opaque value to be used as RANGE_SEQ_IF::next() parameter
+  */
+  range_seq_t (*init)(void *init_params, uint n_ranges, uint flags);
+
+
+  /*
+    Get the next range in the range sequence
+
+    SYNOPSIS
+      next()
+        seq    The value returned by RANGE_SEQ_IF::init()
+        range  OUT Information about the next range
+    
+    RETURN
+      FALSE - Ok, the range structure filled with info about the next range
+      TRUE  - No more ranges
+  */
+  bool (*next) (range_seq_t seq, KEY_MULTI_RANGE *range);
+
+  /*
+    Check whether range_info orders to skip the next record
+
+    SYNOPSIS
+      skip_record()
+        seq         The value returned by RANGE_SEQ_IF::init()
+        range_info  Information about the next range 
+                    (Ignored if MRR_NO_ASSOCIATION is set)
+        rowid       Rowid of the record to be checked (ignored if set to 0)
+    
+    RETURN
+      1 - Record with this range_info and/or this rowid shall be filtered
+          out from the stream of records returned by multi_range_read_next()
+      0 - The record shall be left in the stream
+  */ 
+  bool (*skip_record) (range_seq_t seq, range_id_t range_info, uchar *rowid);
+
+  /*
+    Check if the record combination matches the index condition
+    SYNOPSIS
+      skip_index_tuple()
+        seq         The value returned by RANGE_SEQ_IF::init()
+        range_info  Information about the next range 
+    
+    RETURN
+      0 - The record combination satisfies the index condition
+      1 - Otherwise
+  */ 
+  bool (*skip_index_tuple) (range_seq_t seq, range_id_t range_info);
+} RANGE_SEQ_IF;
+
+typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info);
+
+class COST_VECT
+{ 
+public:
+  double io_count;     /* number of I/O                 */
+  double avg_io_cost;  /* cost of an average I/O oper.  */
+  double cpu_cost;     /* cost of operations in CPU     */
+  double mem_cost;     /* cost of used memory           */ 
+  double import_cost;  /* cost of remote operations     */
+  
+  enum { IO_COEFF=1 };
+  enum { CPU_COEFF=1 };
+  enum { MEM_COEFF=1 };
+  enum { IMPORT_COEFF=1 };
+
+  COST_VECT() {}                              // keep gcc happy
+
+  double total_cost() 
+  {
+    return IO_COEFF*io_count*avg_io_cost + CPU_COEFF * cpu_cost +
+           MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+  }
+
+  void zero()
+  {
+    avg_io_cost= 1.0;
+    io_count= cpu_cost= mem_cost= import_cost= 0.0;
+  }
+
+  void multiply(double m)
+  {
+    io_count *= m;
+    cpu_cost *= m;
+    import_cost *= m;
+    /* Don't multiply mem_cost */
+  }
+
+  void add(const COST_VECT* cost)
+  {
+    double io_count_sum= io_count + cost->io_count;
+    add_io(cost->io_count, cost->avg_io_cost);
+    io_count= io_count_sum;
+    cpu_cost += cost->cpu_cost;
+  }
+  void add_io(double add_io_cnt, double add_avg_cost)
+  {
+    /* In edge cases add_io_cnt may be zero */
+    if (add_io_cnt > 0)
+    {
+      double io_count_sum= io_count + add_io_cnt;
+      avg_io_cost= (io_count * avg_io_cost + 
+                    add_io_cnt * add_avg_cost) / io_count_sum;
+      io_count= io_count_sum;
+    }
+  }
+
+  /*
+    To be used when we go from old single value-based cost calculations to
+    the new COST_VECT-based.
+  */
+  void convert_from_cost(double cost)
+  {
+    zero();
+    avg_io_cost= 1.0;
+    io_count= cost;
+  }
+};
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
+                         COST_VECT *cost);
+
+/*
+  Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
+  The ranges may not use the full key but all of them will use the same number
+  of key parts.
+*/
+#define HA_MRR_SINGLE_POINT 1
+#define HA_MRR_FIXED_KEY  2
+
+/* 
+  Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the
+  'range' parameter.
+*/
+#define HA_MRR_NO_ASSOCIATION 4
+
+/* 
+  The MRR user will provide ranges in key order, and MRR implementation
+  must return rows in key order.
+*/
+#define HA_MRR_SORTED 8
+
+/* MRR implementation doesn't have to retrieve full records */
+#define HA_MRR_INDEX_ONLY 16
+
+/* 
+  The passed memory buffer is of maximum possible size, the caller can't
+  assume larger buffer.
+*/
+#define HA_MRR_LIMITS 32
+
+
+/*
+  Flag set <=> default MRR implementation is used
+  (The choice is made by **_info[_const]() function which may set this
+   flag. SQL layer remembers the flag value and then passes it to
+   multi_read_range_init().
+*/
+#define HA_MRR_USE_DEFAULT_IMPL 64
+
+/*
+  Used only as parameter to multi_range_read_info():
+  Flag set <=> the caller guarantees that the bounds of the scanned ranges
+  will not have NULL values.
+*/
+#define HA_MRR_NO_NULL_ENDPOINTS 128
+
+/*
+  The MRR user has materialized range keys somewhere in the user's buffer.
+  This can be used for optimization of the procedure that sorts these keys
+  since in this case key values don't have to be copied into the MRR buffer.
+
+  In other words, it is guaranteed that after RANGE_SEQ_IF::next() call the 
+  pointer in range->start_key.key will point to a key value that will remain 
+  there until the end of the MRR scan.
+*/
+#define HA_MRR_MATERIALIZED_KEYS 256
+
+/*
+  The following bits are reserved for use by MRR implementation. The intended
+  use scenario:
+
+  * sql layer calls handler->multi_range_read_info[_const]() 
+    - MRR implementation figures out what kind of scan it will perform, saves
+      the result in *mrr_mode parameter.
+  * sql layer remembers what was returned in *mrr_mode
+
+  * the optimizer picks the query plan (which may or may not include the MRR 
+    scan that was estimated by the multi_range_read_info[_const] call)
+
+  * if the query is an EXPLAIN statement, sql layer will call 
+    handler->multi_range_read_explain_info(mrr_mode) to get a text description
+    of the picked MRR scan; the description will be a part of EXPLAIN output.
+*/
+#define HA_MRR_IMPLEMENTATION_FLAG1 512
+#define HA_MRR_IMPLEMENTATION_FLAG2 1024
+#define HA_MRR_IMPLEMENTATION_FLAG3 2048
+#define HA_MRR_IMPLEMENTATION_FLAG4 4096
+#define HA_MRR_IMPLEMENTATION_FLAG5 8192
+#define HA_MRR_IMPLEMENTATION_FLAG6 16384
+
+#define HA_MRR_IMPLEMENTATION_FLAGS \
+  (512 | 1024 | 2048 | 4096 | 8192 | 16384)
 
 /*
   This is a buffer area that the handler can use to store rows.
@@ -1108,8 +1593,8 @@ typedef struct st_ha_check_opt
 
 typedef struct st_handler_buffer
 {
-  const uchar *buffer;         /* Buffer one can start using */
-  const uchar *buffer_end;     /* End of buffer */
+  /* const? */uchar *buffer;         /* Buffer one can start using */
+  /* const? */uchar *buffer_end;     /* End of buffer */
   uchar *end_of_used_area;     /* End of area that was used by handler */
 } HANDLER_BUFFER;
 
@@ -1135,16 +1620,21 @@ public:
   ha_rows records;
   ha_rows deleted;			/* Deleted records */
   ulong mean_rec_length;		/* physical reclength */
-  ulong create_time;			/* When table was created */
-  ulong check_time;
-  ulong update_time;
+  time_t create_time;			/* When table was created */
+  time_t check_time;
+  time_t update_time;
   uint block_size;			/* index block size */
 
+  /*
+    number of buffer bytes that native mrr implementation needs,
+  */
+  uint mrr_length_per_rec; 
+
   ha_statistics():
     data_file_length(0), max_data_file_length(0),
     index_file_length(0), delete_length(0), auto_increment_value(0),
     records(0), deleted(0), mean_rec_length(0), create_time(0),
-    check_time(0), update_time(0), block_size(0)
+    check_time(0), update_time(0), block_size(0), mrr_length_per_rec(0)
   {}
 };
 
@@ -1204,27 +1694,42 @@ public:
 
   ha_statistics stats;
 
-  /** The following are for read_multi_range */
-  bool multi_range_sorted;
-  KEY_MULTI_RANGE *multi_range_curr;
-  KEY_MULTI_RANGE *multi_range_end;
-  HANDLER_BUFFER *multi_range_buffer;
+  /** MultiRangeRead-related members: */
+  range_seq_t mrr_iter;    /* Interator to traverse the range sequence */
+  RANGE_SEQ_IF mrr_funcs;  /* Range sequence traversal functions */
+  HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */
+  uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
+  /* TRUE <=> source MRR ranges and the output are ordered */
+  bool mrr_is_output_sorted;
+
+  /** TRUE <=> we're currently traversing a range in mrr_cur_range. */
+  bool mrr_have_range;
+  /** Current range (the one we're now returning rows from) */
+  KEY_MULTI_RANGE mrr_cur_range;
 
   /** The following are for read_range() */
   key_range save_end_range, *end_range;
   KEY_PART_INFO *range_key_part;
   int key_compare_result_on_equal;
   bool eq_range;
+  bool internal_tmp_table;                      /* If internal tmp table */
 
   uint errkey;				/* Last dup key */
   uint key_used_on_scan;
   uint active_index;
+  /* 
+    TRUE <=> the engine guarantees that returned records are within the range
+    being scanned.
+  */
+  bool in_range_check_pushed_down;
+
   /** Length of ref (1-8 or the clustered key length) */
   uint ref_length;
   FT_INFO *ft_handler;
   enum {NONE=0, INDEX, RND} inited;
   bool locked;
   bool implicit_emptied;                /* Can be !=0 only if HEAP */
+  bool mark_trx_done;
   const COND *pushed_cond;
   /**
     next_insert_id is the next value which should be inserted into the
@@ -1247,6 +1752,16 @@ public:
     Interval returned by get_auto_increment() and being consumed by the
     inserter.
   */
+  /* Statistics  variables */
+  ulonglong rows_read;
+  ulonglong rows_tmp_read;
+  ulonglong rows_changed;
+  /* One bigger than needed to avoid to test if key == MAX_KEY */
+  ulonglong index_rows_read[MAX_KEY+1];
+
+  Item *pushed_idx_cond;
+  uint pushed_idx_cond_keyno;  /* The index which the above condition is for */
+
   Discrete_interval auto_inc_interval_for_cur_row;
   /**
      Number of reserved auto-increment intervals. Serves as a heuristic
@@ -1274,13 +1789,18 @@ public:
     :table_share(share_arg), table(0),
     estimation_rows_to_insert(0), ht(ht_arg),
     ref(0), key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
+    in_range_check_pushed_down(FALSE),
     ref_length(sizeof(my_off_t)),
     ft_handler(0), inited(NONE),
-    locked(FALSE), implicit_emptied(0),
+    locked(FALSE), implicit_emptied(0), mark_trx_done(FALSE),
     pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
+    pushed_idx_cond(NULL),
+    pushed_idx_cond_keyno(MAX_KEY),
     auto_inc_intervals_count(0),
     m_psi(NULL)
-    {}
+  {
+    reset_statistics();
+  }
   virtual ~handler(void)
   {
     DBUG_ASSERT(locked == FALSE);
@@ -1294,24 +1814,32 @@ public:
   }
   /* ha_ methods: pubilc wrappers for private virtual API */
 
-  int ha_open(TABLE *table, const char *name, int mode, int test_if_locked);
+  int ha_open(TABLE *table, const char *name, int mode, uint test_if_locked);
   int ha_index_init(uint idx, bool sorted)
   {
     int result;
     DBUG_ENTER("ha_index_init");
     DBUG_ASSERT(inited==NONE);
     if (!(result= index_init(idx, sorted)))
-      inited=INDEX;
+    {
+      inited=       INDEX;
+      active_index= idx;
+      end_range= NULL;
+    }
     DBUG_RETURN(result);
   }
   int ha_index_end()
   {
     DBUG_ENTER("ha_index_end");
     DBUG_ASSERT(inited==INDEX);
-    inited=NONE;
+    inited=       NONE;
+    active_index= MAX_KEY;
+    end_range=    NULL;
     DBUG_RETURN(index_end());
   }
-  int ha_rnd_init(bool scan)
+  /* This is called after index_init() if we need to do a index scan */
+  virtual int prepare_index_scan() { return 0; }
+  int ha_rnd_init(bool scan) __attribute__ ((warn_unused_result))
   {
     int result;
     DBUG_ENTER("ha_rnd_init");
@@ -1326,7 +1854,15 @@ public:
     inited=NONE;
     DBUG_RETURN(rnd_end());
   }
+  int ha_rnd_init_with_error(bool scan) __attribute__ ((warn_unused_result));
   int ha_reset();
+  /* Tell handler (not storage engine) this is start of a new statement */
+  void ha_start_of_new_statement()
+  {
+    ft_handler= 0;
+    mark_trx_done= FALSE;
+  }
+
   /* this is necessary in many places, e.g. in HANDLER command */
   int ha_index_or_rnd_end()
   {
@@ -1355,13 +1891,17 @@ public:
   int ha_repair(THD* thd, HA_CHECK_OPT* check_opt);
   void ha_start_bulk_insert(ha_rows rows)
   {
+    DBUG_ENTER("handler::ha_start_bulk_insert");
     estimation_rows_to_insert= rows;
     start_bulk_insert(rows);
+    DBUG_VOID_RETURN;
   }
   int ha_end_bulk_insert()
   {
+    DBUG_ENTER("handler::ha_end_bulk_insert");
     estimation_rows_to_insert= 0;
-    return end_bulk_insert();
+    int ret= end_bulk_insert();
+    DBUG_RETURN(ret);
   }
   int ha_bulk_update_row(const uchar *old_data, uchar *new_data,
                          uint *dup_key_found);
@@ -1399,15 +1939,31 @@ public:
   virtual void print_error(int error, myf errflag);
   virtual bool get_error_message(int error, String *buf);
   uint get_dup_key(int error);
+  void reset_statistics()
+  {
+    rows_read= rows_changed= rows_tmp_read= 0;
+    bzero(index_rows_read, sizeof(index_rows_read));
+  }
   virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
   {
     table= table_arg;
     table_share= share;
+    reset_statistics();
   }
   virtual double scan_time()
   { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
   virtual double read_time(uint index, uint ranges, ha_rows rows)
   { return rows2double(ranges+rows); }
+
+  /**
+    Calculate cost of 'keyread' scan for given index and number of records.
+
+     @param index    index to read
+     @param ranges   #of ranges to read
+     @param rows     #of records to read
+  */
+  virtual double keyread_time(uint index, uint ranges, ha_rows rows);
+
   virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
   bool has_transactions()
   { return (ha_table_flags() & HA_NO_TRANSACTIONS) == 0; }
@@ -1464,8 +2020,13 @@ public:
     as there may be several calls to this routine.
   */
   virtual void column_bitmaps_signal();
-  uint get_index(void) const { return active_index; }
-  virtual int close(void)=0;
+  /*
+    We have to check for inited as some engines, like innodb, sets
+    active_index during table scan.
+  */
+  uint get_index(void) const
+  { return inited == INDEX ? active_index : MAX_KEY; }
+  int ha_close(void);
 
   /**
     @retval  0   Bulk update used by handler
@@ -1511,22 +2072,23 @@ public:
   }
   /**
      @brief
-     Positions an index cursor to the index specified in the handle. Fetches the
-     row if available. If the key value is null, begin at the first key of the
-     index.
+     Positions an index cursor to the index specified in the
+     handle. Fetches the row if available. If the key value is null,
+     begin at the first key of the index.
   */
+protected:
   virtual int index_read_map(uchar * buf, const uchar * key,
                              key_part_map keypart_map,
                              enum ha_rkey_function find_flag)
   {
     uint key_len= calculate_key_len(table, active_index, key, keypart_map);
-    return  index_read(buf, key, key_len, find_flag);
+    return index_read(buf, key, key_len, find_flag);
   }
   /**
      @brief
-     Positions an index cursor to the index specified in the handle. Fetches the
-     row if available. If the key value is null, begin at the first key of the
-     index.
+     Positions an index cursor to the index specified in the
+     handle. Fetches the row if available. If the key value is null,
+     begin at the first key of the index.
   */
   virtual int index_read_idx_map(uchar * buf, uint index, const uchar * key,
                                  key_part_map keypart_map,
@@ -1540,30 +2102,85 @@ public:
   virtual int index_last(uchar * buf)
    { return  HA_ERR_WRONG_COMMAND; }
   virtual int index_next_same(uchar *buf, const uchar *key, uint keylen);
-  /**
-     @brief
-     The following functions works like index_read, but it find the last
-     row with the current key value or prefix.
-  */
-  virtual int index_read_last_map(uchar * buf, const uchar * key,
-                                  key_part_map keypart_map)
+  virtual int close(void)=0;
+  inline void update_rows_read()
   {
-    uint key_len= calculate_key_len(table, active_index, key, keypart_map);
-    return index_read_last(buf, key, key_len);
+    if (likely(!internal_tmp_table))
+      rows_read++;
+    else
+      rows_tmp_read++;
+  }
+  inline void update_index_statistics()
+  {
+    index_rows_read[active_index]++;
+    update_rows_read();
   }
-  virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                     KEY_MULTI_RANGE *ranges, uint range_count,
-                                     bool sorted, HANDLER_BUFFER *buffer);
-  virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+public:
+
+  /* Similar functions like the above, but does statistics counting */
+  inline int ha_index_read_map(uchar * buf, const uchar * key,
+                               key_part_map keypart_map,
+                               enum ha_rkey_function find_flag);
+  inline int ha_index_read_idx_map(uchar * buf, uint index, const uchar * key,
+                                   key_part_map keypart_map,
+                                   enum ha_rkey_function find_flag);
+  inline int ha_index_next(uchar * buf);
+  inline int ha_index_prev(uchar * buf);
+  inline int ha_index_first(uchar * buf);
+  inline int ha_index_last(uchar * buf);
+  inline int ha_index_next_same(uchar *buf, const uchar *key, uint keylen);
+  /*
+    TODO: should we make for those functions non-virtual ha_func_name wrappers,
+    too?
+  */
+  virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                              void *seq_init_param, 
+                                              uint n_ranges, uint *bufsz,
+                                              uint *mrr_mode, COST_VECT *cost);
+  virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                                        uint key_parts, uint *bufsz, 
+                                        uint *mrr_mode, COST_VECT *cost);
+  virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                                    uint n_ranges, uint mrr_mode, 
+                                    HANDLER_BUFFER *buf);
+  virtual int multi_range_read_next(range_id_t *range_info);
+  /*
+    Return string representation of the MRR plan.
+
+    This is intended to be used for EXPLAIN, via the following scenario:
+    1. SQL layer calls handler->multi_range_read_info().
+    1.1. Storage engine figures out whether it will use some non-default
+         MRR strategy, sets appropritate bits in *mrr_mode, and returns 
+         control to SQL layer
+    2. SQL layer remembers the returned mrr_mode
+    3. SQL layer compares various options and choses the final query plan. As
+       a part of that, it makes a choice of whether to use the MRR strategy
+       picked in 1.1
+    4. EXPLAIN code converts the query plan to its text representation. If MRR
+       strategy is part of the plan, it calls
+       multi_range_read_explain_info(mrr_mode) to get a text representation of
+       the picked MRR strategy.
+
+    @param mrr_mode   Mode which was returned by multi_range_read_info[_const]
+    @param str        INOUT string to be printed for EXPLAIN
+    @param str_end    End of the string buffer. The function is free to put the 
+                      string into [str..str_end] memory range.
+  */
+  virtual int multi_range_read_explain_info(uint mrr_mode, char *str, 
+                                            size_t size)
+  { return 0; }
+
   virtual int read_range_first(const key_range *start_key,
                                const key_range *end_key,
                                bool eq_range, bool sorted);
   virtual int read_range_next();
   int compare_key(key_range *range);
+  int compare_key2(key_range *range);
   virtual int ft_init() { return HA_ERR_WRONG_COMMAND; }
   void ft_end() { ft_handler=NULL; }
   virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
     { return NULL; }
+private:
   virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; }
   virtual int rnd_next(uchar *buf)=0;
   virtual int rnd_pos(uchar * buf, uchar *pos)=0;
@@ -1573,20 +2190,33 @@ public:
     It will return the row with the PK given in the record argument.
   */
   virtual int rnd_pos_by_record(uchar *record)
-    {
-      position(record);
-      return rnd_pos(record, ref);
-    }
+  {
+    position(record);
+    return rnd_pos(record, ref);
+  }
   virtual int read_first_row(uchar *buf, uint primary_key);
+public:
+
+  /* Same as above, but with statistics */
+  inline int ha_ft_read(uchar *buf);
+  inline int ha_rnd_next(uchar *buf);
+  inline int ha_rnd_pos(uchar *buf, uchar *pos);
+  inline int ha_rnd_pos_by_record(uchar *buf);
+  inline int ha_read_first_row(uchar *buf, uint primary_key);
+
   /**
-    The following function is only needed for tables that may be temporary
-    tables during joins.
+    The following 3 function is only needed for tables that may be
+    internal temporary tables during joins.
   */
-  virtual int restart_rnd_next(uchar *buf, uchar *pos)
+  virtual int remember_rnd_pos()
+    { return HA_ERR_WRONG_COMMAND; }
+  virtual int restart_rnd_next(uchar *buf)
     { return HA_ERR_WRONG_COMMAND; }
   virtual int rnd_same(uchar *buf, uint inx)
     { return HA_ERR_WRONG_COMMAND; }
-  virtual ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key)
+
+  virtual ha_rows records_in_range(uint inx, key_range *min_key,
+                                   key_range *max_key)
     { return (ha_rows) 10; }
   /*
     If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, then it sets ref
@@ -1680,6 +2310,7 @@ public:
   { return(NULL);}  /* gets tablespace name from handler */
   /** used in ALTER TABLE; 1 if changing storage engine is allowed */
   virtual bool can_switch_engines() { return 1; }
+  virtual int can_continue_handler_scan() { return 0; }
   /**
     Get the list of foreign keys in this table.
 
@@ -1727,7 +2358,8 @@ public:
   */
   virtual const char **bas_ext() const =0;
 
-  virtual int get_default_no_partitions(HA_CREATE_INFO *info) { return 1;}
+  virtual int get_default_no_partitions(HA_CREATE_INFO *create_info)
+  { return 1;}
   virtual void set_auto_partitions(partition_info *part_info) { return; }
   virtual bool get_no_parts(const char *name,
                             uint *no_parts)
@@ -1793,18 +2425,18 @@ public:
   virtual uint max_supported_key_part_length() const { return 255; }
   virtual uint min_record_length(uint options) const { return 1; }
 
-  virtual bool low_byte_first() const { return 1; }
   virtual uint checksum() const { return 0; }
   virtual bool is_crashed() const  { return 0; }
   virtual bool auto_repair() const { return 0; }
 
+  void update_global_table_stats();
+  void update_global_index_stats();
 
 #define CHF_CREATE_FLAG 0
 #define CHF_DELETE_FLAG 1
 #define CHF_RENAME_FLAG 2
 #define CHF_INDEX_FLAG  3
 
-
   /**
     @note lock_count() can return > 1 if the table is MERGE or partitioned.
   */
@@ -1872,9 +2504,28 @@ public:
 
 
  /*
-   @retval TRUE   Primary key (if there is one) is clustered
-                  key covering all fields
-   @retval FALSE  otherwise
+   Check if the primary key (if there is one) is a clustered and a
+   reference key. This means:
+
+   - Data is stored together with the primary key (no secondary lookup
+     needed to find the row data). The optimizer uses this to find out
+     the cost of fetching data.
+   - The primary key is part of each secondary key and is used
+     to find the row data in the primary index when reading trough
+     secondary indexes.
+   - When doing a HA_KEYREAD_ONLY we get also all the primary key parts
+     into the row. This is critical property used by index_merge.
+
+   All the above is usually true for engines that store the row
+   data in the primary key index (e.g. in a b-tree), and use the primary
+   key value as a position().  InnoDB is an example of such an engine.
+
+   For such a clustered primary key, the following should also hold:
+   index_flags() should contain HA_CLUSTERED_INDEX
+   table_flags() should contain HA_TABLE_SCAN_ON_INDEX
+
+   @retval TRUE   yes
+   @retval FALSE  No.
  */
  virtual bool primary_key_is_clustered() { return FALSE; }
  virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
@@ -1915,6 +2566,7 @@ public:
    Pops the top if condition stack, if stack is not empty.
  */
  virtual void cond_pop() { return; };
+ virtual Item *idx_cond_push(uint keyno, Item* idx_cond) { return idx_cond; }
  virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
 					 uint table_changes)
  { return COMPATIBLE_DATA_NO; }
@@ -1932,9 +2584,26 @@ public:
     return 0;
   }
 
+  LEX_STRING *engine_name() { return hton_name(ht); }
+
+  /*
+    @brief
+    Check whether the engine supports virtual columns
+    
+    @retval
+      FALSE   if the engine does not support virtual columns    
+    @retval
+      TRUE    if the engine supports virtual columns
+  */
+
+  virtual bool check_if_supported_virtual_columns(void) { return FALSE;}
+  
+  TABLE* get_table() { return table; }
 protected:
+  /* deprecated, don't use in new engines */
+  inline void ha_statistic_increment(ulong SSV::*offset) const { }
+
   /* Service methods for use by storage engines. */
-  void ha_statistic_increment(ulong SSV::*offset) const;
   void **ha_data(THD *) const;
   THD *ha_thd(void) const;
 
@@ -1984,10 +2653,18 @@ protected:
     tables.
   */
   virtual int delete_table(const char *name);
+
 private:
   /* Private helpers */
-  inline void mark_trx_read_write();
-private:
+  void mark_trx_read_write_part2();
+  inline void mark_trx_read_write()
+  {
+    if (!mark_trx_done)
+      mark_trx_read_write_part2();
+  }
+  inline void increment_statistics(ulong SSV::*offset) const;
+  inline void decrement_statistics(ulong SSV::*offset) const;
+
   /*
     Low-level primitives for storage engines.  These should be
     overridden by the storage engine class. To call these methods, use
@@ -1995,8 +2672,9 @@ private:
   */
 
   virtual int open(const char *name, int mode, uint test_if_locked)=0;
-  virtual int index_init(uint idx, bool sorted) { active_index= idx; return 0; }
-  virtual int index_end() { active_index= MAX_KEY; return 0; }
+  /* Note: ha_index_read_idx_map() may buypass index_init() */
+  virtual int index_init(uint idx, bool sorted) { return 0; }
+  virtual int index_end() { return 0; }
   /**
     rnd_init() can be called two times without rnd_end() in between
     (it only makes sense if scan=1).
@@ -2077,8 +2755,6 @@ private:
   virtual int index_read(uchar * buf, const uchar * key, uint key_len,
                          enum ha_rkey_function find_flag)
    { return  HA_ERR_WRONG_COMMAND; }
-  virtual int index_read_last(uchar * buf, const uchar * key, uint key_len)
-   { return (my_errno= HA_ERR_WRONG_COMMAND); }
   /**
     This method is similar to update_row, however the handler doesn't need
     to execute the updates at this point in time. The handler can be certain
@@ -2125,14 +2801,16 @@ private:
     @remark The table is locked in exclusive mode.
   */
   virtual int truncate()
-  { return HA_ERR_WRONG_COMMAND; }
+  {
+    int error= delete_all_rows();
+    return error ? error : reset_auto_increment(0);
+  }
   /**
     Reset the auto-increment counter to the given value, i.e. the next row
-    inserted will get the given value. HA_ERR_WRONG_COMMAND is returned by
-    storage engines that don't support this operation.
+    inserted will get the given value.
   */
   virtual int reset_auto_increment(ulonglong value)
-  { return HA_ERR_WRONG_COMMAND; }
+  { return 0; }
   virtual int optimize(THD* thd, HA_CHECK_OPT* check_opt)
   { return HA_ADMIN_NOT_IMPLEMENTED; }
   virtual int analyze(THD* thd, HA_CHECK_OPT* check_opt)
@@ -2161,8 +2839,18 @@ private:
   { return HA_ERR_WRONG_COMMAND; }
   virtual int rename_partitions(const char *path)
   { return HA_ERR_WRONG_COMMAND; }
+  friend class ha_partition;
+public:
+  /* XXX to be removed, see ha_partition::partition_ht() */
+  virtual handlerton *partition_ht() const
+  { return ht; }
+  inline int ha_write_tmp_row(uchar *buf);
+  inline int ha_update_tmp_row(const uchar * old_data, uchar * new_data);
 };
 
+#include "multi_range_read.h"
+
+bool key_uses_partial_cols(TABLE *table, uint keyno);
 
 	/* Some extern variables used with handlers */
 
@@ -2191,7 +2879,7 @@ static inline enum legacy_db_type ha_legacy_type(const handlerton *db_type)
 
 static inline const char *ha_resolve_storage_engine_name(const handlerton *db_type)
 {
-  return db_type == NULL ? "UNKNOWN" : hton2plugin[db_type->slot]->name.str;
+  return db_type == NULL ? "UNKNOWN" : hton_name(db_type)->str;
 }
 
 static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag)
@@ -2217,6 +2905,7 @@ int ha_panic(enum ha_panic_function flag);
 void ha_close_connection(THD* thd);
 bool ha_flush_logs(handlerton *db_type);
 void ha_drop_database(char* path);
+void ha_checkpoint_state(bool disable);
 int ha_create_table(THD *thd, const char *path,
                     const char *db, const char *table_name,
                     HA_CREATE_INFO *create_info,
@@ -2238,9 +2927,10 @@ int ha_find_files(THD *thd,const char *db,const char *path,
 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name);
 
 /* key cache */
-extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache);
+extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *);
 int ha_resize_key_cache(KEY_CACHE *key_cache);
 int ha_change_key_cache_param(KEY_CACHE *key_cache);
+int ha_repartition_key_cache(KEY_CACHE *key_cache);
 int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache);
 
 /* report to InnoDB that control passes to the client */
@@ -2297,10 +2987,8 @@ const char *get_canonical_filename(handler *file, const char *path,
                                    char *tmp_path);
 bool mysql_xa_recover(THD *thd);
 
-
 inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
 {
   return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
 }
-
-#endif /* HANDLER_INCLUDED */
+#endif
diff --git a/sql/hash_filo.cc b/sql/hash_filo.cc
index 20b4f98238a..7c275ffc617 100644
--- a/sql/hash_filo.cc
+++ b/sql/hash_filo.cc
@@ -25,3 +25,8 @@
 
 #include "sql_priv.h"
 #include "hash_filo.h"
+
+#ifdef __WIN__
+// Remove linker warning 4221 about empty file
+namespace { char dummy; };
+#endif // __WIN__
diff --git a/sql/hash_filo.h b/sql/hash_filo.h
index da5a424e170..dab54928a55 100644
--- a/sql/hash_filo.h
+++ b/sql/hash_filo.h
@@ -111,7 +111,7 @@ public:
     return entry;
   }
 
-  my_bool add(hash_filo_element *entry)
+  bool add(hash_filo_element *entry)
   {
     if (cache.records == size)
     {
diff --git a/sql/hostname.cc b/sql/hostname.cc
index d34df68587c..763c4647532 100644
--- a/sql/hostname.cc
+++ b/sql/hostname.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
diff --git a/sql/item.cc b/sql/item.cc
index ca1ae1c4f71..e4b7b1f5092 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -41,9 +41,23 @@
                                        // find_item_in_list,
                                        // RESOLVED_AGAINST_ALIAS, ...
 #include "log_event.h"                 // append_query_string
+#include "sql_expression_cache.h"
 
 const String my_null_string("NULL", 4, default_charset_info);
 
+static int save_field_in_field(Field *from, bool *null_value,
+                               Field *to, bool no_conversions);
+
+
+/**
+  Compare two Items for List<Item>::add_unique()
+*/
+
+bool cmp_items(Item *a, Item *b)
+{
+  return a->eq(b, FALSE);
+}
+
 /****************************************************************************/
 
 /* Hybrid_type_traits {_real} */
@@ -210,10 +224,12 @@ bool Item::val_bool()
   case STRING_RESULT:
     return val_real() != 0.0;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
     return 0;                                   // Wrong (but safe)
   }
+  return 0;                                   // Wrong (but safe)
 }
 
 
@@ -251,7 +267,7 @@ String *Item::val_string_from_real(String *str)
   double nr= val_real();
   if (null_value)
     return 0;					/* purecov: inspected */
-  str->set_real(nr,decimals, &my_charset_bin);
+  str->set_real(nr,decimals, &my_charset_numeric);
   return str;
 }
 
@@ -261,7 +277,7 @@ String *Item::val_string_from_int(String *str)
   longlong nr= val_int();
   if (null_value)
     return 0;
-  str->set_int(nr, unsigned_flag, &my_charset_bin);
+  str->set_int(nr, unsigned_flag, &my_charset_numeric);
   return str;
 }
 
@@ -277,6 +293,21 @@ String *Item::val_string_from_decimal(String *str)
 }
 
 
+String *Item::val_string_from_date(String *str)
+{
+  MYSQL_TIME ltime;
+  if (get_date(&ltime, TIME_FUZZY_DATE) ||
+      str->alloc(MAX_DATE_STRING_REP_LENGTH))
+  {
+    null_value= 1;
+    return (String *) 0;
+  }
+  str->length(my_TIME_to_str(&ltime, const_cast<char*>(str->ptr()), decimals));
+  str->set_charset(&my_charset_numeric);
+  return str;
+}
+
+
 my_decimal *Item::val_decimal_from_real(my_decimal *decimal_value)
 {
   double nr= val_real();
@@ -302,7 +333,7 @@ my_decimal *Item::val_decimal_from_string(my_decimal *decimal_value)
   String *res;
 
   if (!(res= val_str(&str_value)))
-    return NULL;
+    return 0;
 
   if (str2my_decimal(E_DEC_FATAL_ERROR & ~E_DEC_BAD_NUM,
                      res->ptr(), res->length(), res->charset(),
@@ -374,17 +405,20 @@ int Item::save_time_in_field(Field *field)
   if (get_time(&ltime))
     return set_field_to_null_with_conversions(field, 0);
   field->set_notnull();
-  return field->store_time(&ltime, MYSQL_TIMESTAMP_TIME);
+  return field->store_time_dec(&ltime, decimals);
 }
 
 
 int Item::save_date_in_field(Field *field)
 {
   MYSQL_TIME ltime;
-  if (get_date(&ltime, TIME_FUZZY_DATE))
+  if (get_date(&ltime, TIME_FUZZY_DATE |
+                       (current_thd->variables.sql_mode &
+                          (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
+                             MODE_INVALID_DATES))))
     return set_field_to_null_with_conversions(field, 0);
   field->set_notnull();
-  return field->store_time(&ltime, MYSQL_TIMESTAMP_DATETIME);
+  return field->store_time_dec(&ltime, decimals);
 }
 
 
@@ -420,15 +454,18 @@ int Item::save_str_value_in_field(Field *field, String *result)
 
 
 Item::Item():
-  rsize(0), name(0), orig_name(0), name_length(0), fixed(0),
-  is_autogenerated_name(TRUE),
+  is_expensive_cache(-1), rsize(0), name(0), orig_name(0), name_length(0),
+  fixed(0), is_autogenerated_name(TRUE),
   collation(&my_charset_bin, DERIVATION_COERCIBLE)
 {
   marker= 0;
-  maybe_null=null_value=with_sum_func=unsigned_flag=0;
+  maybe_null=null_value=with_sum_func=with_field=unsigned_flag=0;
+  in_rollup= 0;
   decimals= 0; max_length= 0;
   with_subselect= 0;
-  cmp_context= (Item_result)-1;
+  cmp_context= IMPOSSIBLE_RESULT;
+   /* Initially this item is not attached to any JOIN_TAB. */
+  join_tab_idx= MAX_TABLES;
 
   /* Put item in free list so that we can free all items at end */
   THD *thd= current_thd;
@@ -457,6 +494,8 @@ Item::Item():
   tables.
 */
 Item::Item(THD *thd, Item *item):
+  join_tab_idx(item->join_tab_idx),
+  is_expensive_cache(-1),
   rsize(0),
   str_value(item->str_value),
   name(item->name),
@@ -466,13 +505,15 @@ Item::Item(THD *thd, Item *item):
   marker(item->marker),
   decimals(item->decimals),
   maybe_null(item->maybe_null),
+  in_rollup(item->in_rollup),
   null_value(item->null_value),
   unsigned_flag(item->unsigned_flag),
   with_sum_func(item->with_sum_func),
+  with_field(item->with_field),
   fixed(item->fixed),
   is_autogenerated_name(item->is_autogenerated_name),
-  collation(item->collation),
   with_subselect(item->with_subselect),
+  collation(item->collation),
   cmp_context(item->cmp_context)
 {
   next= thd->free_list;				// Put in free list
@@ -508,11 +549,40 @@ void Item::print_item_w_name(String *str, enum_query_type query_type)
 }
 
 
+void Item::print_value(String *str)
+{
+  char buff[MAX_FIELD_WIDTH];
+  String *ptr, tmp(buff,sizeof(buff),str->charset());
+  ptr= val_str(&tmp);
+  if (!ptr)
+    str->append("NULL");
+  else
+  {
+    switch (result_type()) {
+    case STRING_RESULT:
+      append_unescaped(str, ptr->ptr(), ptr->length());
+      break;
+    case DECIMAL_RESULT:
+    case REAL_RESULT:
+    case INT_RESULT:
+      str->append(*ptr);
+      break;
+    case ROW_RESULT:
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);
+    }
+  }
+}
+
+
 void Item::cleanup()
 {
   DBUG_ENTER("Item::cleanup");
+  DBUG_PRINT("enter", ("this: %p", this));
   fixed=0;
   marker= 0;
+  join_tab_idx= MAX_TABLES;
   if (orig_name)
     name= orig_name;
   DBUG_VOID_RETURN;
@@ -550,6 +620,45 @@ void Item::rename(char *new_name)
   name= new_name;
 }
 
+Item_result Item::cmp_type() const
+{
+  switch (field_type()) {
+  case MYSQL_TYPE_DECIMAL:
+  case MYSQL_TYPE_NEWDECIMAL:
+                           return DECIMAL_RESULT;
+  case MYSQL_TYPE_TINY:
+  case MYSQL_TYPE_SHORT:
+  case MYSQL_TYPE_LONG:
+  case MYSQL_TYPE_LONGLONG:
+  case MYSQL_TYPE_INT24:
+  case MYSQL_TYPE_YEAR:
+  case MYSQL_TYPE_BIT:
+                           return INT_RESULT;
+  case MYSQL_TYPE_FLOAT:
+  case MYSQL_TYPE_DOUBLE:
+                           return REAL_RESULT;
+  case MYSQL_TYPE_NULL:
+  case MYSQL_TYPE_VARCHAR:
+  case MYSQL_TYPE_TINY_BLOB:
+  case MYSQL_TYPE_MEDIUM_BLOB:
+  case MYSQL_TYPE_LONG_BLOB:
+  case MYSQL_TYPE_BLOB:
+  case MYSQL_TYPE_VAR_STRING:
+  case MYSQL_TYPE_STRING:
+  case MYSQL_TYPE_ENUM:
+  case MYSQL_TYPE_SET:
+  case MYSQL_TYPE_GEOMETRY:
+                           return STRING_RESULT;
+  case MYSQL_TYPE_TIMESTAMP:
+  case MYSQL_TYPE_DATE:
+  case MYSQL_TYPE_TIME:
+  case MYSQL_TYPE_DATETIME:
+  case MYSQL_TYPE_NEWDATE:
+                           return TIME_RESULT;
+  };
+  DBUG_ASSERT(0);
+  return IMPOSSIBLE_RESULT;
+}
 
 /**
   Traverse item tree possibly transforming it (replacing items).
@@ -588,6 +697,36 @@ Item* Item::transform(Item_transformer transformer, uchar *arg)
 }
 
 
+/**
+  Create and set up an expression cache for this item
+
+  @param thd             Thread handle
+  @param depends_on      List of the expression parameters
+
+  @details
+  The function creates an expression cache for an item and its parameters
+  specified by the 'depends_on' list. Then the expression cache is placed
+  into a cache wrapper that is returned as the result of the function.
+
+  @returns
+  A pointer to created wrapper item if successful, NULL - otherwise
+*/
+
+Item* Item::set_expr_cache(THD *thd)
+{
+  DBUG_ENTER("Item::set_expr_cache");
+  Item_cache_wrapper *wrapper;
+  if ((wrapper= new Item_cache_wrapper(this)) &&
+      !wrapper->fix_fields(thd, (Item**)&wrapper))
+  {
+    if (wrapper->set_cache(thd))
+      DBUG_RETURN(NULL);
+    DBUG_RETURN(wrapper);
+  }
+  DBUG_RETURN(NULL);
+}
+
+
 Item_ident::Item_ident(Name_resolution_context *context_arg,
                        const char *db_name_arg,const char *table_name_arg,
 		       const char *field_name_arg)
@@ -596,7 +735,7 @@ Item_ident::Item_ident(Name_resolution_context *context_arg,
    db_name(db_name_arg), table_name(table_name_arg),
    field_name(field_name_arg),
    alias_name_used(FALSE), cached_field_index(NO_CACHED_FIELD_INDEX),
-   cached_table(0), depended_from(0)
+   cached_table(0), depended_from(0), can_be_depended(TRUE)
 {
   name = (char*) field_name_arg;
 }
@@ -608,7 +747,7 @@ Item_ident::Item_ident(TABLE_LIST *view_arg, const char *field_name_arg)
    db_name(NullS), table_name(view_arg->alias),
    field_name(field_name_arg),
    alias_name_used(FALSE), cached_field_index(NO_CACHED_FIELD_INDEX),
-   cached_table(NULL), depended_from(NULL)
+   cached_table(NULL), depended_from(NULL), can_be_depended(TRUE)
 {
   name = (char*) field_name_arg;
 }
@@ -630,7 +769,8 @@ Item_ident::Item_ident(THD *thd, Item_ident *item)
    alias_name_used(item->alias_name_used),
    cached_field_index(item->cached_field_index),
    cached_table(item->cached_table),
-   depended_from(item->depended_from)
+   depended_from(item->depended_from),
+   can_be_depended(item->can_be_depended)
 {}
 
 void Item_ident::cleanup()
@@ -648,20 +788,30 @@ void Item_ident::cleanup()
   db_name= orig_db_name; 
   table_name= orig_table_name;
   field_name= orig_field_name;
-  depended_from= 0;
+  /* Store if this Item was depended */
+  can_be_depended= test(depended_from);
   DBUG_VOID_RETURN;
 }
 
 bool Item_ident::remove_dependence_processor(uchar * arg)
 {
   DBUG_ENTER("Item_ident::remove_dependence_processor");
-  if (depended_from == (st_select_lex *) arg)
+  if (get_depended_from() == (st_select_lex *) arg)
     depended_from= 0;
   context= &((st_select_lex *) arg)->context;
   DBUG_RETURN(0);
 }
 
 
+bool Item_ident::collect_outer_ref_processor(uchar *param)
+{
+  Collect_deps_prm *prm= (Collect_deps_prm *)param;
+  if (depended_from && depended_from->nest_level < prm->nest_level)
+    prm->parameters->add_unique(this, &cmp_items);
+  return FALSE;
+}
+
+
 /**
   Store the pointer to this item field into a list if not already there.
 
@@ -697,6 +847,16 @@ bool Item_field::collect_item_field_processor(uchar *arg)
 }
 
 
+bool Item_field::add_field_to_set_processor(uchar *arg)
+{
+  DBUG_ENTER("Item_field::add_field_to_set_processor");
+  DBUG_PRINT("info", ("%s", field->field_name ? field->field_name : "noname"));
+  TABLE *table= (TABLE *) arg;
+  if (field->table == table)
+    bitmap_set_bit(&table->tmp_set, field->field_index);
+  DBUG_RETURN(FALSE);
+}
+
 /**
   Check if an Item_field references some field from a list of fields.
 
@@ -741,6 +901,38 @@ bool Item_field::register_field_in_read_map(uchar *arg)
   TABLE *table= (TABLE *) arg;
   if (field->table == table || !table)
     bitmap_set_bit(field->table->read_set, field->field_index);
+  if (field->vcol_info && field->vcol_info->expr_item)
+    return field->vcol_info->expr_item->walk(&Item::register_field_in_read_map, 
+                                             1, arg);
+  return 0;
+}
+
+/*
+  @brief
+  Mark field in bitmap supplied as *arg
+*/
+
+bool Item_field::register_field_in_bitmap(uchar *arg)
+{
+  MY_BITMAP *bitmap= (MY_BITMAP *) arg;
+  DBUG_ASSERT(bitmap);
+  bitmap_set_bit(bitmap, field->field_index);
+  return 0;
+}
+
+
+/*
+  Mark field in write_map
+
+  NOTES
+    This is used by UPDATE to register underlying fields of used view fields.
+*/
+
+bool Item_field::register_field_in_write_map(uchar *arg)
+{
+  TABLE *table= (TABLE *) arg;
+  if (field->table == table || !table)
+    bitmap_set_bit(field->table->write_set, field->field_index);
   return 0;
 }
 
@@ -767,7 +959,8 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
   }
   if (cs->ctype)
   {
-    uint orig_len= length;
+    const char *str_start= str;
+
     /*
       This will probably need a better implementation in the future:
       a function in CHARSET_INFO structure.
@@ -777,16 +970,20 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
       length--;
       str++;
     }
-    if (orig_len != length && !is_autogenerated_name)
+    if (str != str_start && !is_autogenerated_name)
     {
+      char buff[SAFE_NAME_LEN];
+      strmake(buff, str_start,
+              min(sizeof(buff)-1, length + (int) (str-str_start)));
+
       if (length == 0)
         push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                             ER_NAME_BECOMES_EMPTY, ER(ER_NAME_BECOMES_EMPTY),
-                            str + length - orig_len);
+                            buff);
       else
         push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                             ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
-                            str + length - orig_len);
+                            buff);
     }
   }
   if (!my_charset_same(cs, system_charset_info))
@@ -982,12 +1179,52 @@ bool Item_string::eq(const Item *item, bool binary_cmp) const
 
 /**
   Get the value of the function as a MYSQL_TIME structure.
-  As a extra convenience the time structure is reset on error!
+  As a extra convenience the time structure is reset on error or NULL values!
 */
 
 bool Item::get_date(MYSQL_TIME *ltime,uint fuzzydate)
 {
-  if (result_type() == STRING_RESULT)
+  if (field_type() == MYSQL_TYPE_TIME)
+    fuzzydate|= TIME_TIME_ONLY;
+
+  switch (result_type()) {
+  case INT_RESULT:
+  {
+    longlong value= val_int();
+    if (field_type() == MYSQL_TYPE_YEAR)
+    {
+      if (max_length == 2)
+      {
+        if (value < 70)
+          value+= 2000;
+        else if (value <= 1900)
+          value+= 1900;
+      }
+      value*= 10000; /* make it YYYYMMHH */
+    }
+    if (null_value || int_to_datetime_with_warn(value, ltime, fuzzydate,
+                                                field_name_or_null()))
+      goto err;
+    break;
+  }
+  case REAL_RESULT:
+  {
+    double value= val_real();
+    if (null_value || double_to_datetime_with_warn(value, ltime, fuzzydate,
+                                                   field_name_or_null()))
+      goto err;
+    break;
+  }
+  case DECIMAL_RESULT:
+  {
+    my_decimal value, *res;
+    if (!(res= val_decimal(&value)) ||
+        decimal_to_datetime_with_warn(res, ltime, fuzzydate,
+                                      field_name_or_null()))
+      goto err;
+    break;
+  }
+  case STRING_RESULT:
   {
     char buff[40];
     String tmp(buff,sizeof(buff), &my_charset_bin),*res;
@@ -995,25 +1232,12 @@ bool Item::get_date(MYSQL_TIME *ltime,uint fuzzydate)
         str_to_datetime_with_warn(res->charset(), res->ptr(), res->length(),
                                   ltime, fuzzydate) <= MYSQL_TIMESTAMP_ERROR)
       goto err;
+    break;
   }
-  else
-  {
-    int was_cut;
-    longlong value= val_int();
-
-    if (null_value)
-      goto err;
-
-    if (number_to_datetime(value, ltime, fuzzydate, &was_cut) == LL(-1))
-    {
-      char buff[22], *end;
-      end= longlong10_to_str(value, buff, -10);
-      make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                   buff, (int) (end-buff), MYSQL_TIMESTAMP_NONE,
-                                   NullS);
-      goto err;
-    }
+  default:
+    DBUG_ASSERT(0);
   }
+
   return 0;
 
 err:
@@ -1021,23 +1245,20 @@ err:
   return 1;
 }
 
-/**
-  Get time of first argument.\
-
-  As a extra convenience the time structure is reset on error!
-*/
-
-bool Item::get_time(MYSQL_TIME *ltime)
+bool Item::get_seconds(ulonglong *sec, ulong *sec_part)
 {
-  char buff[40];
-  String tmp(buff,sizeof(buff),&my_charset_bin),*res;
-  if (!(res=val_str_ascii(&tmp)) ||
-      str_to_time_with_warn(res->charset(), res->ptr(), res->length(), ltime))
-  {
-    bzero((char*) ltime,sizeof(*ltime));
-    return 1;
+  if (result_type() == INT_RESULT)
+  { // optimize for an important special case
+    longlong val= val_int();
+    bool neg= val < 0 && !unsigned_flag;
+    *sec= neg ? -val : val;
+    *sec_part= 0;
+    return neg;
   }
-  return 0;
+  my_decimal tmp, *dec= val_decimal(&tmp);
+  if (!dec)
+    return 0;
+  return my_decimal2seconds(dec, sec, sec_part);
 }
 
 CHARSET_INFO *Item::default_charset()
@@ -1063,6 +1284,7 @@ int Item::save_in_field_no_warnings(Field *field, bool no_conversions)
   my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set);
   ulonglong sql_mode= thd->variables.sql_mode;
   thd->variables.sql_mode&= ~(MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE);
+  thd->variables.sql_mode|= MODE_INVALID_DATES;
   thd->count_cuted_fields= CHECK_FIELD_IGNORE;
 
   res= save_in_field(field, no_conversions);
@@ -1479,7 +1701,7 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
   /* An item of type Item_sum  is registered <=> ref_by != 0 */ 
   if (type() == SUM_FUNC_ITEM && skip_registered && 
       ((Item_sum *) this)->ref_by)
-    return;                                                 
+    return;
   if ((type() != SUM_FUNC_ITEM && with_sum_func) ||
       (type() == FUNC_ITEM &&
        (((Item_func *) this)->functype() == Item_func::ISNOTNULLTEST_FUNC ||
@@ -1505,6 +1727,11 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
     */
     Item_aggregate_ref *item_ref;
     uint el= fields.elements;
+    /*
+      If this is an item_ref, get the original item
+      This is a safety measure if this is called for things that is
+      already a reference.
+    */
     Item *real_itm= real_item();
 
     ref_pointer_array[el]= real_itm;
@@ -1592,7 +1819,9 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
     if (collation == &my_charset_bin)
     {
       if (derivation <= dt.derivation)
-	; // Do nothing
+      {
+	/* Do nothing */
+      }
       else
       {
 	set(dt); 
@@ -1608,7 +1837,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
              left_is_superset(this, &dt))
     {
-      // Do nothing
+      /* Do nothing */
     }
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
              left_is_superset(&dt, this))
@@ -1619,7 +1848,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
              derivation < dt.derivation &&
              dt.derivation >= DERIVATION_SYSCONST)
     {
-      // Do nothing;
+      /* Do nothing */
     }
     else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) &&
              dt.derivation < derivation &&
@@ -1637,7 +1866,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
   }
   else if (derivation < dt.derivation)
   {
-    // Do nothing
+    /* Do nothing */
   }
   else if (dt.derivation < derivation)
   {
@@ -1647,7 +1876,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
   { 
     if (collation == dt.collation)
     {
-      // Do nothing
+      /* Do nothing */
     }
     else 
     {
@@ -1932,6 +2161,7 @@ Item_field::Item_field(Field *f)
     if this item is to be reused
   */
   orig_table_name= orig_field_name= "";
+  with_field= 1;
 }
 
 
@@ -1980,6 +2210,7 @@ Item_field::Item_field(THD *thd, Name_resolution_context *context_arg,
     name= (char*) orig_field_name;
   }
   set_field(f);
+  with_field= 1;
 }
 
 
@@ -1994,6 +2225,7 @@ Item_field::Item_field(Name_resolution_context *context_arg,
   collation.set(DERIVATION_IMPLICIT);
   if (select && select->parsing_place != IN_HAVING)
       select->select_n_where_fields++;
+  with_field= 1;
 }
 
 /**
@@ -2010,6 +2242,7 @@ Item_field::Item_field(THD *thd, Item_field *item)
    any_privileges(item->any_privileges)
 {
   collation.set(DERIVATION_IMPLICIT);
+  with_field= 1;
 }
 
 
@@ -2102,6 +2335,15 @@ void Item_field::reset_field(Field *f)
   name= (char*) f->field_name;
 }
 
+
+bool Item_field::enumerate_field_refs_processor(uchar *arg)
+{
+  Field_enumerator *fe= (Field_enumerator*)arg;
+  fe->visit_field(this);
+  return FALSE;
+}
+
+
 const char *Item_ident::full_name() const
 {
   char *tmp;
@@ -2238,25 +2480,21 @@ bool Item_field::get_date(MYSQL_TIME *ltime,uint fuzzydate)
 
 bool Item_field::get_date_result(MYSQL_TIME *ltime,uint fuzzydate)
 {
-  if ((null_value=result_field->is_null()) ||
-      result_field->get_date(ltime,fuzzydate))
+  if (result_field->is_null() || result_field->get_date(ltime,fuzzydate))
   {
     bzero((char*) ltime,sizeof(*ltime));
-    return 1;
+    return (null_value= 1);
   }
-  return 0;
+  return (null_value= 0);
 }
 
-bool Item_field::get_time(MYSQL_TIME *ltime)
+
+void Item_field::save_result(Field *to)
 {
-  if ((null_value=field->is_null()) || field->get_time(ltime))
-  {
-    bzero((char*) ltime,sizeof(*ltime));
-    return 1;
-  }
-  return 0;
+  save_field_in_field(result_field, &null_value, to, TRUE);
 }
 
+
 double Item_field::val_result()
 {
   if ((null_value=result_field->is_null()))
@@ -2299,10 +2537,12 @@ bool Item_field::val_bool_result()
   case STRING_RESULT:
     return result_field->val_real() != 0.0;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
     return 0;                                   // Shut up compiler
   }
+  return 0;
 }
 
 
@@ -2346,7 +2586,29 @@ table_map Item_field::used_tables() const
 {
   if (field->table->const_table)
     return 0;					// const item
-  return (depended_from ? OUTER_REF_TABLE_BIT : field->table->map);
+  return (get_depended_from() ? OUTER_REF_TABLE_BIT : field->table->map);
+}
+
+table_map Item_field::all_used_tables() const
+{
+  return (get_depended_from() ? OUTER_REF_TABLE_BIT : field->table->map);
+}
+
+void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  if (new_parent == get_depended_from())
+    depended_from= NULL;
+  Name_resolution_context *ctx= new Name_resolution_context();
+  ctx->outer_context= NULL; // We don't build a complete name resolver
+  ctx->table_list= NULL;    // We rely on first_name_resolution_table instead
+  ctx->select_lex= new_parent;
+  ctx->first_name_resolution_table= context->first_name_resolution_table;
+  ctx->last_name_resolution_table=  context->last_name_resolution_table;
+  ctx->error_processor=             context->error_processor;
+  ctx->error_processor_data=        context->error_processor_data;
+  ctx->resolve_in_select_list=      context->resolve_in_select_list;
+  ctx->security_ctx=                context->security_ctx;
+  this->context=ctx;
 }
 
 
@@ -2646,19 +2908,20 @@ void Item_string::print(String *str, enum_query_type query_type)
 
 
 double 
-double_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end)
+double_from_string_with_check(CHARSET_INFO *cs, const char *cptr,
+                              const char *end)
 {
   int error;
-  char *org_end;
+  char *end_of_num= (char*) end;
   double tmp;
 
-  org_end= end;
-  tmp= my_strntod(cs, (char*) cptr, end - cptr, &end, &error);
-  if (error || (end != org_end && !check_if_only_end_space(cs, end, org_end)))
+  tmp= my_strntod(cs, (char*) cptr, end - cptr, &end_of_num, &error);
+  if (error || (end != end_of_num &&
+                !check_if_only_end_space(cs, end_of_num, end)))
   {
-    ErrConvString err(cptr, cs);
+    ErrConvString err(cptr, end - cptr, cs);
     /*
-      We can use str_value.ptr() here as Item_string is gurantee to put an
+      We can use err.ptr() here as ErrConvString is guranteed to put an
       end \0 here.
     */
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
@@ -2673,28 +2936,31 @@ double_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end)
 double Item_string::val_real()
 {
   DBUG_ASSERT(fixed == 1);
-  return double_from_string_with_check (str_value.charset(), str_value.ptr(), 
-                                        (char *) str_value.ptr() + str_value.length());
+  return double_from_string_with_check(str_value.charset(),
+                                       str_value.ptr(), 
+                                       str_value.ptr() +
+                                       str_value.length());
 }
 
 
 longlong 
-longlong_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end)
+longlong_from_string_with_check(CHARSET_INFO *cs, const char *cptr,
+                                const char *end)
 {
   int err;
   longlong tmp;
-  char *org_end= end;
+  char *end_of_num= (char*) end;
 
-  tmp= (*(cs->cset->strtoll10))(cs, cptr, &end, &err);
+  tmp= (*(cs->cset->strtoll10))(cs, cptr, &end_of_num, &err);
   /*
     TODO: Give error if we wanted a signed integer and we got an unsigned
     one
   */
   if (!current_thd->no_errors &&
       (err > 0 ||
-       (end != org_end && !check_if_only_end_space(cs, end, org_end))))
+       (end != end_of_num && !check_if_only_end_space(cs, end_of_num, end))))
   {
-    ErrConvString err(cptr, cs);
+    ErrConvString err(cptr, end - cptr, cs);
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_TRUNCATED_WRONG_VALUE,
                         ER(ER_TRUNCATED_WRONG_VALUE), "INTEGER",
@@ -2712,7 +2978,7 @@ longlong Item_string::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   return longlong_from_string_with_check(str_value.charset(), str_value.ptr(),
-                             (char *) str_value.ptr()+ str_value.length());
+                                         str_value.ptr()+ str_value.length());
 }
 
 
@@ -2904,19 +3170,19 @@ void Item_param::set_time(MYSQL_TIME *tm, timestamp_type time_type,
   if (value.time.year > 9999 || value.time.month > 12 ||
       value.time.day > 31 ||
       (time_type != MYSQL_TIMESTAMP_TIME && value.time.hour > 23) ||
-      value.time.minute > 59 || value.time.second > 59)
+      value.time.minute > 59 || value.time.second > 59 ||
+      value.time.second_part > TIME_MAX_SECOND_PART)
   {
-    char buff[MAX_DATE_STRING_REP_LENGTH];
-    uint length= my_TIME_to_str(&value.time, buff);
+    ErrConvTime str(&value.time);
     make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                 buff, length, time_type, 0);
+                                 &str, time_type, 0);
     set_zero_time(&value.time, MYSQL_TIMESTAMP_ERROR);
   }
 
   state= TIME_VALUE;
   maybe_null= 0;
   max_length= max_length_arg;
-  decimals= 0;
+  decimals= tm->second_part > 0 ? TIME_SECOND_PART_DIGITS : 0;
   DBUG_VOID_RETURN;
 }
 
@@ -2994,7 +3260,7 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
     unsigned_flag= entry->unsigned_flag;
     if (limit_clause_param)
     {
-      my_bool unused;
+      bool unused;
       set_int(entry->val_int(&unused), MY_INT64_NUM_DECIMAL_DIGITS);
       item_type= Item::INT_ITEM;
       DBUG_RETURN(!unsigned_flag && value.integer < 0 ? 1 : 0);
@@ -3003,10 +3269,12 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
     case REAL_RESULT:
       set_double(*(double*)entry->value);
       item_type= Item::REAL_ITEM;
+      param_type= MYSQL_TYPE_DOUBLE;
       break;
     case INT_RESULT:
       set_int(*(longlong*)entry->value, MY_INT64_NUM_DECIMAL_DIGITS);
       item_type= Item::INT_ITEM;
+      param_type= MYSQL_TYPE_LONGLONG;
       break;
     case STRING_RESULT:
     {
@@ -3029,6 +3297,7 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
         charset of connection, so we have to set it later.
       */
       item_type= Item::STRING_ITEM;
+      param_type= MYSQL_TYPE_VARCHAR;
 
       if (set_str((const char *)entry->value, entry->length))
         DBUG_RETURN(1);
@@ -3044,9 +3313,12 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
         my_decimal_precision_to_length_no_truncation(ent_value->precision(),
                                                      decimals, unsigned_flag);
       item_type= Item::DECIMAL_ITEM;
+      param_type= MYSQL_TYPE_NEWDECIMAL;
       break;
     }
-    default:
+    case ROW_RESULT:
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
       DBUG_ASSERT(0);
       set_null();
     }
@@ -3108,7 +3380,7 @@ int Item_param::save_in_field(Field *field, bool no_conversions)
   case DECIMAL_VALUE:
     return field->store_decimal(&decimal_value);
   case TIME_VALUE:
-    field->store_time(&value.time, value.time.time_type);
+    field->store_time_dec(&value.time, decimals);
     return 0;
   case STRING_VALUE:
   case LONG_DATA_VALUE:
@@ -3124,21 +3396,6 @@ int Item_param::save_in_field(Field *field, bool no_conversions)
 }
 
 
-bool Item_param::get_time(MYSQL_TIME *res)
-{
-  if (state == TIME_VALUE)
-  {
-    *res= value.time;
-    return 0;
-  }
-  /*
-    If parameter value isn't supplied assertion will fire in val_str()
-    which is called from Item::get_time().
-  */
-  return Item::get_time(res);
-}
-
-
 bool Item_param::get_date(MYSQL_TIME *res, uint fuzzydate)
 {
   if (state == TIME_VALUE)
@@ -3268,7 +3525,8 @@ String *Item_param::val_str(String* str)
   {
     if (str->reserve(MAX_DATE_STRING_REP_LENGTH))
       break;
-    str->length((uint) my_TIME_to_str(&value.time, (char*) str->ptr()));
+    str->length((uint) my_TIME_to_str(&value.time, (char*) str->ptr(),
+                decimals));
     str->set_charset(&my_charset_bin);
     return str;
   }
@@ -3320,7 +3578,7 @@ const String *Item_param::query_val_str(String* str) const
       buf= str->c_ptr_quick();
       ptr= buf;
       *ptr++= '\'';
-      ptr+= (uint) my_TIME_to_str(&value.time, ptr);
+      ptr+= (uint) my_TIME_to_str(&value.time, ptr, decimals);
       *ptr++= '\'';
       str->length((uint32) (ptr - buf));
       break;
@@ -3663,6 +3921,7 @@ void Item_param::make_field(Send_field *field)
 /****************************************************************************
   Item_copy
 ****************************************************************************/
+
 Item_copy *Item_copy::create (Item *item)
 {
   switch (item->result_type())
@@ -3676,7 +3935,9 @@ Item_copy *Item_copy::create (Item *item)
         new Item_copy_uint (item) : new Item_copy_int (item);
     case DECIMAL_RESULT:
       return new Item_copy_decimal (item);
-    default:
+    case TIME_RESULT:
+    case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
       DBUG_ASSERT (0);
   }
   /* should not happen */
@@ -3749,8 +4010,7 @@ void Item_copy_int::copy()
   null_value=item->null_value;
 }
 
-static int save_int_value_in_field (Field *field, longlong nr, 
-                                    bool null_value, bool unsigned_flag);
+static int save_int_value_in_field (Field *, longlong, bool, bool);
 
 int Item_copy_int::save_in_field(Field *field, bool no_conversions)
 {
@@ -3904,6 +4164,15 @@ bool Item::fix_fields(THD *thd, Item **ref)
   return FALSE;
 }
 
+
+void Item_ref_null_helper::save_val(Field *to)
+{
+  DBUG_ASSERT(fixed == 1);
+  (*ref)->save_val(to);
+  owner->was_null|= null_value= (*ref)->null_value;
+}
+
+
 double Item_ref_null_helper::val_real()
 {
   DBUG_ASSERT(fixed == 1);
@@ -3967,7 +4236,7 @@ bool Item_ref_null_helper::get_date(MYSQL_TIME *ltime, uint fuzzydate)
                          substitution)
 */
 
-static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
+static bool mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
                               Item_ident *resolved_item,
                               Item_ident *mark_item)
 {
@@ -3976,9 +4245,11 @@ static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
   const char *table_name= (resolved_item->table_name ?
                            resolved_item->table_name : "");
   /* store pointer on SELECT_LEX from which item is dependent */
-  if (mark_item)
+  if (mark_item && mark_item->can_be_depended)
     mark_item->depended_from= last;
-  current->mark_as_dependent(last);
+  if (current->mark_as_dependent(thd, last, /** resolved_item psergey-thu
+    **/mark_item))
+    return TRUE;
   if (thd->lex->describe & DESCRIBE_EXTENDED)
   {
     push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
@@ -3988,6 +4259,7 @@ static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
                  resolved_item->field_name,
                  current->select_number, last->select_number);
   }
+  return FALSE;
 }
 
 
@@ -4079,7 +4351,7 @@ static Item** find_field_in_group_list(Item *find_item, ORDER *group_list)
   int         found_match_degree= 0;
   Item_ident *cur_field;
   int         cur_match_degree= 0;
-  char        name_buff[NAME_LEN+1];
+  char        name_buff[SAFE_NAME_LEN+1];
 
   if (find_item->type() == Item::FIELD_ITEM ||
       find_item->type() == Item::REF_ITEM)
@@ -4271,6 +4543,34 @@ resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, SELECT_LEX *select)
 }
 
 
+/*
+  @brief
+  Whether a table belongs to an outer select.
+
+  @param table table to check
+  @param select current select
+
+  @details
+  Try to find select the table belongs to by ascending the derived tables chain.
+*/
+
+static
+bool is_outer_table(TABLE_LIST *table, SELECT_LEX *select)
+{
+  DBUG_ASSERT(table->select_lex != select);
+  TABLE_LIST *tl;
+
+  for (tl= select->master_unit()->derived;
+       tl && tl->is_merged_derived();
+       select= tl->select_lex, tl= select->master_unit()->derived)
+  {
+    if (tl->select_lex == table->select_lex)
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
 /**
   Resolve the name of an outer select column reference.
 
@@ -4537,8 +4837,9 @@ Item_field::fix_outer_field(THD *thd, Field **from_field, Item **reference)
       return -1;
 
     mark_as_dependent(thd, last_checked_context->select_lex,
-                      context->select_lex, this,
+                      context->select_lex, rf,
                       rf);
+
     return 0;
   }
   else
@@ -4681,31 +4982,18 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
               It's not an Item_field in the select list so we must make a new
               Item_ref to point to the Item in the select list and replace the
               Item_field created by the parser with the new Item_ref.
-
-              NOTE: If we are fixing an alias reference inside ORDER/GROUP BY
-              item tree, then we use new Item_ref as an intermediate value
-              to resolve referenced item only.
-              In this case the new Item_ref item is unused.
             */
             Item_ref *rf= new Item_ref(context, db_name,table_name,field_name);
             if (!rf)
               return 1;
-
-            bool save_group_fix_field= thd->lex->current_select->group_fix_field;
-            /*
-              No need for recursive resolving of aliases.
-            */
-            thd->lex->current_select->group_fix_field= 0;
-
             bool ret= rf->fix_fields(thd, (Item **) &rf) || rf->check_cols(1);
-            thd->lex->current_select->group_fix_field= save_group_fix_field;
             if (ret)
               return TRUE;
-
-            if (save_group_fix_field && alias_name_used)
-              thd->change_item_tree(reference, *rf->ref);
-            else
-              thd->change_item_tree(reference, rf);
+           
+            SELECT_LEX *select= thd->lex->current_select;
+            thd->change_item_tree(reference,
+                                  select->parsing_place == IN_GROUP_BY && 
+				  alias_name_used  ?  *rf->ref : rf);
 
             return FALSE;
           }
@@ -4722,7 +5010,8 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
 
     if (!outer_fixed && cached_table && cached_table->select_lex &&
         context->select_lex &&
-        cached_table->select_lex != context->select_lex)
+        cached_table->select_lex != context->select_lex &&
+        is_outer_table(cached_table, context->select_lex))
     {
       int ret;
       if ((ret= fix_outer_field(thd, &from_field, reference)) < 0)
@@ -4838,6 +5127,21 @@ error:
   return TRUE;
 }
 
+/*
+  @brief
+  Mark virtual columns as used in a partitioning expression 
+*/
+
+bool Item_field::vcol_in_partition_func_processor(uchar *int_arg)
+{
+  DBUG_ASSERT(fixed);
+  if (field->vcol_info)
+  {
+    field->vcol_info->mark_as_in_partitioning_expr();
+  }
+  return FALSE;
+}
+
 
 Item *Item_field::safe_charset_converter(CHARSET_INFO *tocs)
 {
@@ -4850,6 +5154,7 @@ void Item_field::cleanup()
 {
   DBUG_ENTER("Item_field::cleanup");
   Item_ident::cleanup();
+  depended_from= NULL;
   /*
     Even if this object was created by direct link to field in setup_wild()
     it will be linked correctly next time by name of field and table alias.
@@ -4901,13 +5206,14 @@ Item_equal *Item_field::find_item_equal(COND_EQUAL *cond_equal)
 
 
 /**
-  Check whether a field can be substituted by an equal item.
+  Check whether a field item can be substituted for an equal item
 
-  The function checks whether a substitution of the field
-  occurrence for an equal item is valid.
+  @details
+  The function checks whether a substitution of a field item for
+  an equal item is valid.
 
-  @param arg   *arg != NULL <-> the field is in the context where
-               substitution for an equal item is valid
+  @param arg   *arg != NULL <-> the field is in the context
+               where substitution for an equal item is valid
 
   @note
     The following statement is not always true:
@@ -4932,7 +5238,10 @@ Item_equal *Item_field::find_item_equal(COND_EQUAL *cond_equal)
 
 bool Item_field::subst_argument_checker(uchar **arg)
 {
-  return (result_type() != STRING_RESULT) || (*arg);
+  return *arg &&
+         (*arg == (uchar *) Item::ANY_SUBST ||
+          result_type() != STRING_RESULT || 
+          (field->flags & BINARY_FLAG));
 }
 
 
@@ -5010,12 +5319,7 @@ Item *Item_field::equal_fields_propagator(uchar *arg)
     item= this;
   else if (field && (field->flags & ZEROFILL_FLAG) && IS_NUM(field->type()))
   {
-    /*
-      We don't need to zero-fill timestamp columns here because they will be 
-      first converted to a string (in date/time format) and compared as such if
-      compared with another string.
-    */
-    if (item && field->type() != FIELD_TYPE_TIMESTAMP && cmp_context != INT_RESULT)
+    if (item && (cmp_context == STRING_RESULT || cmp_context == IMPOSSIBLE_RESULT))
       convert_zerofill_number_to_string(&item, (Field_num *)field);
     else
       item= this;
@@ -5042,7 +5346,8 @@ bool Item_field::set_no_const_sub(uchar *arg)
   Replace an Item_field for an equal Item_field that evaluated earlier
   (if any).
 
-  The function returns a pointer to an item that is taken from
+  If this->item_equal points to some item and coincides with arg then
+  the function returns a pointer to an item that is taken from
   the very beginning of the item_equal list which the Item_field
   object refers to (belongs to) unless item_equal contains  a constant
   item. In this case the function returns this constant item, 
@@ -5050,12 +5355,12 @@ bool Item_field::set_no_const_sub(uchar *arg)
   If the Item_field object does not refer any Item_equal object
   'this' is returned .
 
-  @param arg   a dummy parameter, is not used here
+  @param arg   NULL or points to so some item of the Item_equal type  
 
 
   @note
     This function is supposed to be called as a callback parameter in calls
-    of the thransformer method.
+    of the transformer method.
 
   @return
     - pointer to a replacement Item_field if there is a better equal item or
@@ -5065,7 +5370,7 @@ bool Item_field::set_no_const_sub(uchar *arg)
 
 Item *Item_field::replace_equal_field(uchar *arg)
 {
-  if (item_equal)
+  if (item_equal && item_equal == (Item_equal *) arg)
   {
     Item *const_item= item_equal->get_const();
     if (const_item)
@@ -5074,8 +5379,10 @@ Item *Item_field::replace_equal_field(uchar *arg)
         return this;
       return const_item;
     }
-    Item_field *subst= item_equal->get_first();
-    if (subst && field->table != subst->field->table && !field->eq(subst->field))
+    Item_field *subst= (Item_field *)(item_equal->get_first(this));
+    if (subst)
+      subst= (Item_field *) (subst->real_item());
+    if (subst && !field->eq(subst->field))
       return subst;
   }
   return this;
@@ -5133,10 +5440,12 @@ enum_field_types Item::field_type() const
   case DECIMAL_RESULT: return MYSQL_TYPE_NEWDECIMAL;
   case REAL_RESULT:    return MYSQL_TYPE_DOUBLE;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
     return MYSQL_TYPE_VARCHAR;
   }
+  return MYSQL_TYPE_VARCHAR;
 }
 
 
@@ -5313,16 +5622,19 @@ Field *Item::tmp_table_field_from_field_type(TABLE *table, bool fixed_length)
     break;
   case MYSQL_TYPE_NEWDATE:
   case MYSQL_TYPE_DATE:
-    field= new Field_newdate(maybe_null, name, &my_charset_bin);
+    field= new Field_newdate(0, null_ptr, 0, Field::NONE, name, &my_charset_bin);
     break;
   case MYSQL_TYPE_TIME:
-    field= new Field_time(maybe_null, name, &my_charset_bin);
+    field= new_Field_time(0, null_ptr, 0, Field::NONE, name,
+                              decimals, &my_charset_bin);
     break;
   case MYSQL_TYPE_TIMESTAMP:
-    field= new Field_timestamp(maybe_null, name, &my_charset_bin);
+    field= new_Field_timestamp(0, null_ptr, 0,
+                               Field::NONE, name, 0, decimals, &my_charset_bin);
     break;
   case MYSQL_TYPE_DATETIME:
-    field= new Field_datetime(maybe_null, name, &my_charset_bin);
+    field= new_Field_datetime(0, null_ptr, 0, Field::NONE, name,
+                              decimals, &my_charset_bin);
     break;
   case MYSQL_TYPE_YEAR:
     field= new Field_year((uchar*) 0, max_length, null_ptr, 0, Field::NONE,
@@ -5386,47 +5698,69 @@ void Item_field::make_field(Send_field *tmp_field)
 
 
 /**
-  Set a field's value from a item.
-*/
+  Save a field value in another field
 
-void Item_field::save_org_in_field(Field *to)
-{
-  if (field->is_null())
-  {
-    null_value=1;
-    set_field_to_null_with_conversions(to, 1);
-  }
-  else
-  {
-    to->set_notnull();
-    field_conv(to,field);
-    null_value=0;
-  }
-}
+  @param from             Field to take the value from
+  @param [out] null_value Pointer to the null_value flag to set
+  @param to               Field to save the value in
+  @param no_conversions   How to deal with NULL value
 
-int Item_field::save_in_field(Field *to, bool no_conversions)
+  @details
+  The function takes the value of the field 'from' and, if this value
+  is not null, it saves in the field 'to' setting off the flag referenced
+  by 'null_value'. Otherwise this flag is set on and field 'to' is
+  also set to null possibly with conversion.
+
+  @note
+  This function is used by the functions Item_field::save_in_field,
+  Item_field::save_org_in_field and Item_ref::save_in_field
+
+  @retval FALSE OK
+  @retval TRUE  Error
+
+*/
+
+static int save_field_in_field(Field *from, bool *null_value,
+                               Field *to, bool no_conversions)
 {
   int res;
-  if (result_field->is_null())
+  DBUG_ENTER("save_field_in_field");
+  if (from->is_null())
   {
-    null_value=1;
-    return set_field_to_null_with_conversions(to, no_conversions);
+    (*null_value)= 1;
+    DBUG_RETURN(set_field_to_null_with_conversions(to, no_conversions));
   }
   to->set_notnull();
 
   /*
     If we're setting the same field as the one we're reading from there's 
     nothing to do. This can happen in 'SET x = x' type of scenarios.
-  */  
-  if (to == result_field)
+  */
+  if (to == from)
   {
-    null_value=0;
-    return 0;
+    (*null_value)= 0;
+    DBUG_RETURN(0);
   }
 
-  res= field_conv(to,result_field);
-  null_value=0;
-  return res;
+  res= field_conv(to, from);
+  (*null_value)= 0;
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Set a field's value from a item.
+*/
+
+void Item_field::save_org_in_field(Field *to)
+{
+  save_field_in_field(field, &null_value, to, TRUE);
+}
+
+
+int Item_field::save_in_field(Field *to, bool no_conversions)
+{
+  return save_field_in_field(result_field, &null_value, to, no_conversions);
 }
 
 
@@ -5472,7 +5806,7 @@ int Item_null::save_safe_in_field(Field *field)
   Item uses str_value to store something, it should
   reimplement it's ::save_in_field() as Item_string, for example, does.
 
-  Note: all Item_XXX::val_str(str) methods must NOT rely on the fact that
+  Note: all Item_XXX::val_str(str) methods must NOT assume that
   str != str_value. For example, see fix for bug #44743.
 */
 
@@ -5498,15 +5832,6 @@ int Item::save_in_field(Field *field, bool no_conversions)
     error=field->store(result->ptr(),result->length(),cs);
     str_value.set_quick(0, 0, cs);
   }
-  else if (result_type() == REAL_RESULT &&
-           field->result_type() == STRING_RESULT)
-  {
-    double nr= val_real();
-    if (null_value)
-      return set_field_to_null_with_conversions(field, no_conversions);
-    field->set_notnull();
-    error= field->store(nr);
-  }
   else if (result_type() == REAL_RESULT)
   {
     double nr= val_real();
@@ -5544,12 +5869,6 @@ int Item_string::save_in_field(Field *field, bool no_conversions)
 }
 
 
-int Item_uint::save_in_field(Field *field, bool no_conversions)
-{
-  /* Item_int::save_in_field handles both signed and unsigned. */
-  return Item_int::save_in_field(field, no_conversions);
-}
-
 static int save_int_value_in_field (Field *field, longlong nr, 
                                     bool null_value, bool unsigned_flag)
 {
@@ -5566,6 +5885,22 @@ int Item_int::save_in_field(Field *field, bool no_conversions)
 }
 
 
+void Item_datetime::set(longlong packed)
+{
+  unpack_time(packed, &ltime);
+}
+
+int Item_datetime::save_in_field(Field *field, bool no_conversions)
+{
+  field->set_notnull();
+  return field->store_time_dec(&ltime, decimals);
+}
+
+longlong Item_datetime::val_int()
+{
+  return TIME_to_ulonglong(&ltime);
+}
+
 int Item_decimal::save_in_field(Field *field, bool no_conversions)
 {
   field->set_notnull();
@@ -5583,7 +5918,9 @@ bool Item_int::eq(const Item *arg, bool binary_cmp) const
       a basic constant.
     */
     Item *item= (Item*) arg;
-    return item->val_int() == value && item->unsigned_flag == unsigned_flag;
+    return (item->val_int() == value &&
+            ((longlong) value >= 0 ||
+             (item->unsigned_flag == unsigned_flag)));
   }
   return FALSE;
 }
@@ -5944,7 +6281,10 @@ bool Item::send(Protocol *protocol, String *buffer)
   {
     String *res;
     if ((res=val_str(buffer)))
+    {
+      DBUG_ASSERT(!null_value);
       result= protocol->store(res->ptr(),res->length(),res->charset());
+    }
     else
     {
       DBUG_ASSERT(null_value);
@@ -6011,7 +6351,7 @@ bool Item::send(Protocol *protocol, String *buffer)
       if (f_type == MYSQL_TYPE_DATE)
 	return protocol->store_date(&tm);
       else
-	result= protocol->store(&tm);
+	result= protocol->store(&tm, decimals);
     }
     break;
   }
@@ -6020,7 +6360,7 @@ bool Item::send(Protocol *protocol, String *buffer)
     MYSQL_TIME tm;
     get_time(&tm);
     if (!null_value)
-      result= protocol->store_time(&tm);
+      result= protocol->store_time(&tm, decimals);
     break;
   }
   }
@@ -6163,17 +6503,7 @@ void Item_field::print(String *str, enum_query_type query_type)
 {
   if (field && field->table->const_table)
   {
-    char buff[MAX_FIELD_WIDTH];
-    String tmp(buff,sizeof(buff),str->charset());
-    field->val_str(&tmp);
-    if (field->is_null())
-      str->append("NULL");
-    else
-    {
-      str->append('\'');
-      str->append(tmp);
-      str->append('\'');
-    }
+    print_value(str);
     return;
   }
   Item_ident::print(str, query_type);
@@ -6185,7 +6515,7 @@ Item_ref::Item_ref(Name_resolution_context *context_arg,
                    const char *field_name_arg,
                    bool alias_name_used_arg)
   :Item_ident(context_arg, NullS, table_name_arg, field_name_arg),
-   result_field(0), ref(item)
+   result_field(0), ref(item), reference_trough_name(0)
 {
   alias_name_used= alias_name_used_arg;
   /*
@@ -6195,11 +6525,41 @@ Item_ref::Item_ref(Name_resolution_context *context_arg,
     set_properties();
 }
 
+/*
+  A Field_enumerator-compatible class that invokes mark_as_dependent() for
+  each field that is a reference to some ancestor of current_select.
+*/
+class Dependency_marker: public Field_enumerator
+{
+public:
+  THD *thd;
+  st_select_lex *current_select;
+  virtual void visit_field(Item_field *item)
+  {
+    // Find which select the field is in. This is achieved by walking up 
+    // the select tree and looking for the table of interest.
+    st_select_lex *sel;
+    for (sel= current_select; sel; sel= sel->outer_select())
+    {
+      List_iterator<TABLE_LIST> li(sel->leaf_tables);
+      TABLE_LIST *tbl;
+      while ((tbl= li++))
+      {
+        if (tbl->table == item->field->table)
+        {
+          if (sel != current_select)
+            mark_as_dependent(thd, sel, current_select, item, item);
+          return;
+        }
+      }
+    }
+  }
+};
 
 Item_ref::Item_ref(TABLE_LIST *view_arg, Item **item,
                    const char *field_name_arg, bool alias_name_used_arg)
   :Item_ident(view_arg, field_name_arg),
-   result_field(NULL), ref(item)
+   result_field(NULL), ref(item), reference_trough_name(0)
 {
   alias_name_used= alias_name_used_arg;
   /*
@@ -6282,6 +6642,7 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
 
   if (!ref || ref == not_found_item)
   {
+    DBUG_ASSERT(reference_trough_name != 0);
     if (!(ref= resolve_ref_in_select_and_group(thd, this,
                                                context->select_lex)))
       goto error;             /* Some error occurred (e.g. ambiguous names). */
@@ -6432,7 +6793,7 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
           goto error;
         thd->change_item_tree(reference, fld);
         mark_as_dependent(thd, last_checked_context->select_lex,
-                          thd->lex->current_select, this, fld);
+                          thd->lex->current_select, fld, fld);
         /*
           A reference is resolved to a nest level that's outer or the same as
           the nest level of the enclosing set function : adjust the value of
@@ -6468,6 +6829,19 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
                       last_checked_context->select_lex->nest_level);
     }
   }
+  else if (ref_type() != VIEW_REF)
+  {
+    /*
+      It could be that we're referring to something that's in ancestor selects.
+      We must make an appropriate mark_as_dependent() call for each such
+      outside reference.
+    */
+    Dependency_marker dep_marker;
+    dep_marker.current_select= current_sel;
+    dep_marker.thd= thd;
+    (*ref)->walk(&Item::enumerate_field_refs_processor, FALSE,
+                 (uchar*)&dep_marker);
+  }
 
   DBUG_ASSERT(*ref);
   /*
@@ -6513,6 +6887,7 @@ void Item_ref::set_properties()
     split_sum_func() doesn't try to change the reference.
   */
   with_sum_func= (*ref)->with_sum_func;
+  with_field= (*ref)->with_field;
   unsigned_flag= (*ref)->unsigned_flag;
   fixed= 1;
   if (alias_name_used)
@@ -6529,10 +6904,100 @@ void Item_ref::cleanup()
   DBUG_ENTER("Item_ref::cleanup");
   Item_ident::cleanup();
   result_field= 0;
+  if (reference_trough_name)
+  {
+    /* We have to reset the reference as it may been freed */
+    ref= 0;
+  }
   DBUG_VOID_RETURN;
 }
 
 
+/**
+  Transform an Item_ref object with a transformer callback function.
+
+  The function first applies the transform method to the item
+  referenced by this Item_reg object. If this returns a new item the
+  old item is substituted for a new one. After this the transformer
+  is applied to the Item_ref object.
+
+  @param transformer   the transformer callback function to be applied to
+                       the nodes of the tree of the object
+  @param argument      parameter to be passed to the transformer
+
+  @return Item returned as the result of transformation of the Item_ref object
+    @retval !NULL The transformation was successful
+    @retval NULL  Out of memory error
+*/
+
+Item* Item_ref::transform(Item_transformer transformer, uchar *arg)
+{
+  DBUG_ASSERT(!current_thd->stmt_arena->is_stmt_prepare());
+  DBUG_ASSERT((*ref) != NULL);
+
+  /* Transform the object we are referencing. */
+  Item *new_item= (*ref)->transform(transformer, arg);
+  if (!new_item)
+    return NULL;
+
+  /*
+    THD::change_item_tree() should be called only if the tree was
+    really transformed, i.e. when a new item has been created.
+    Otherwise we'll be allocating a lot of unnecessary memory for
+    change records at each execution.
+  */
+  if (*ref != new_item)
+    current_thd->change_item_tree(ref, new_item);
+
+  /* Transform the item ref object. */
+  return (this->*transformer)(arg);
+}
+
+
+/**
+  Compile an Item_ref object with a processor and a transformer
+  callback functions.
+
+  First the function applies the analyzer to the Item_ref object. Then
+  if the analizer succeeeds we first applies the compile method to the
+  object the Item_ref object is referencing. If this returns a new
+  item the old item is substituted for a new one.  After this the
+  transformer is applied to the Item_ref object itself.
+  The compile function is not called if the analyzer returns NULL
+  in the parameter arg_p. 
+
+  @param analyzer      the analyzer callback function to be applied to the
+                       nodes of the tree of the object
+  @param[in,out] arg_p parameter to be passed to the processor
+  @param transformer   the transformer callback function to be applied to the
+                       nodes of the tree of the object
+  @param arg_t         parameter to be passed to the transformer
+
+  @return Item returned as the result of transformation of the Item_ref object
+*/
+
+Item* Item_ref::compile(Item_analyzer analyzer, uchar **arg_p,
+                        Item_transformer transformer, uchar *arg_t)
+{
+  /* Analyze this Item object. */
+  if (!(this->*analyzer)(arg_p))
+    return NULL;
+
+  /* Compile the Item we are referencing. */
+  DBUG_ASSERT((*ref) != NULL);
+  if (*arg_p)
+  {
+    uchar *arg_v= *arg_p;
+    Item *new_item= (*ref)->compile(analyzer, &arg_v, transformer, arg_t);
+    if (new_item && *ref != new_item)
+      current_thd->change_item_tree(ref, new_item);
+  }
+
+  /* Transform this Item object. */
+  return (this->*transformer)(arg_t);
+}
+
+
 void Item_ref::print(String *str, enum_query_type query_type)
 {
   if (ref)
@@ -6639,7 +7104,8 @@ bool Item_ref::val_bool_result()
     case STRING_RESULT:
       return result_field->val_real() != 0.0;
     case ROW_RESULT:
-    default:
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
       DBUG_ASSERT(0);
     }
   }
@@ -6647,6 +7113,25 @@ bool Item_ref::val_bool_result()
 }
 
 
+void Item_ref::save_result(Field *to)
+{
+  if (result_field)
+  {
+    save_field_in_field(result_field, &null_value, to, TRUE);
+    return;
+  }
+  (*ref)->save_result(to);
+  null_value= (*ref)->null_value;
+}
+
+
+void Item_ref::save_val(Field *to)
+{
+  (*ref)->save_result(to);
+  null_value= (*ref)->null_value;
+}
+
+
 double Item_ref::val_real()
 {
   DBUG_ASSERT(fixed);
@@ -6708,7 +7193,19 @@ my_decimal *Item_ref::val_decimal(my_decimal *decimal_value)
 int Item_ref::save_in_field(Field *to, bool no_conversions)
 {
   int res;
-  DBUG_ASSERT(!result_field);
+  if (result_field)
+  {
+    if (result_field->is_null())
+    {
+      null_value= 1;
+      res= set_field_to_null_with_conversions(to, no_conversions);
+      return res;
+    }
+    to->set_notnull();
+    res= field_conv(to, result_field);
+    null_value= 0;
+    return res;
+  }
   res= (*ref)->save_in_field(to, no_conversions);
   null_value= (*ref)->null_value;
   return res;
@@ -6764,6 +7261,13 @@ void Item_ref_null_helper::print(String *str, enum_query_type query_type)
 }
 
 
+void Item_direct_ref::save_val(Field *to)
+{
+  (*ref)->save_val(to);
+  null_value=(*ref)->null_value;
+}
+
+
 double Item_direct_ref::val_real()
 {
   double tmp=(*ref)->val_real();
@@ -6816,6 +7320,395 @@ bool Item_direct_ref::get_date(MYSQL_TIME *ltime,uint fuzzydate)
 }
 
 
+Item_cache_wrapper::~Item_cache_wrapper()
+{
+  DBUG_ASSERT(expr_cache == 0);
+}
+
+Item_cache_wrapper::Item_cache_wrapper(Item *item_arg)
+:orig_item(item_arg), expr_cache(NULL), expr_value(NULL)
+{
+  DBUG_ASSERT(orig_item->fixed);
+  max_length= orig_item->max_length;
+  maybe_null= orig_item->maybe_null;
+  decimals=   orig_item->decimals;
+  collation.set(orig_item->collation);
+  with_sum_func= orig_item->with_sum_func;
+  with_field= orig_item->with_field;
+  unsigned_flag= orig_item->unsigned_flag;
+  name= item_arg->name;
+  name_length= item_arg->name_length;
+  with_subselect=  orig_item->with_subselect;
+
+  if ((expr_value= Item_cache::get_cache(orig_item)))
+    expr_value->setup(orig_item);
+
+  fixed= 1;
+}
+
+
+/**
+  Initialize the cache if it is needed
+*/
+
+void Item_cache_wrapper::init_on_demand()
+{
+    if (!expr_cache->is_inited())
+    {
+      orig_item->get_cache_parameters(parameters);
+      expr_cache->init();
+    }
+}
+
+
+void Item_cache_wrapper::print(String *str, enum_query_type query_type)
+{
+  str->append(func_name());
+  if (expr_cache)
+  {
+    init_on_demand();
+    expr_cache->print(str, query_type);
+  }
+  else
+    str->append(STRING_WITH_LEN("<<DISABLED>>"));
+  str->append('(');
+  orig_item->print(str, query_type);
+  str->append(')');
+}
+
+
+/**
+  Prepare the expression cache wrapper (do nothing)
+
+  @retval FALSE OK
+*/
+
+bool Item_cache_wrapper::fix_fields(THD *thd  __attribute__((unused)),
+                                    Item **it __attribute__((unused)))
+{
+  DBUG_ASSERT(orig_item->fixed);
+  DBUG_ASSERT(fixed);
+  return FALSE;
+}
+
+bool Item_cache_wrapper::send(Protocol *protocol, String *buffer)
+{
+  if (result_field)
+    return protocol->store(result_field);
+  return Item::send(protocol, buffer);
+}
+
+/**
+  Clean the expression cache wrapper up before reusing it.
+*/
+
+void Item_cache_wrapper::cleanup()
+{
+  DBUG_ENTER("Item_cache_wrapper::cleanup");
+  Item_result_field::cleanup();
+  delete expr_cache;
+  expr_cache= 0;
+  /* expr_value is Item so it will be destroyed from list of Items */
+  expr_value= 0;
+  parameters.empty();
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Create an expression cache that uses a temporary table
+
+  @param thd           Thread handle
+  @param depends_on    Parameters of the expression to create cache for
+
+  @details
+  The function takes 'depends_on' as the list of all parameters for
+  the expression wrapped into this object and creates an expression
+  cache in a temporary table containing the field for the parameters
+  and the result of the expression.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+bool Item_cache_wrapper::set_cache(THD *thd)
+{
+  DBUG_ENTER("Item_cache_wrapper::set_cache");
+  DBUG_ASSERT(expr_cache == 0);
+  expr_cache= new Expression_cache_tmptable(thd, parameters, expr_value);
+  DBUG_RETURN(expr_cache == NULL);
+}
+
+
+/**
+  Check if the current values of the parameters are in the expression cache
+
+  @details
+  The function checks whether the current set of the parameters of the
+  referenced item can be found in the expression cache. If so the function
+  returns the item by which the result of the expression can be easily
+  extracted from the cache with the corresponding val_* method.
+
+  @retval NULL    - parameters are not in the cache
+  @retval <item*> - item providing the result of the expression found in cache
+*/
+
+Item *Item_cache_wrapper::check_cache()
+{
+  DBUG_ENTER("Item_cache_wrapper::check_cache");
+  if (expr_cache)
+  {
+    Expression_cache_tmptable::result res;
+    Item *cached_value;
+    init_on_demand();
+    res= expr_cache->check_value(&cached_value);
+    if (res == Expression_cache_tmptable::HIT)
+      DBUG_RETURN(cached_value);
+  }
+  DBUG_RETURN(NULL);
+}
+
+
+/**
+  Get the value of the cached expression and put it in the cache
+*/
+
+inline void Item_cache_wrapper::cache()
+{
+  expr_value->store(orig_item);
+  expr_value->cache_value();
+  expr_cache->put_value(expr_value); // put in expr_cache
+}
+
+
+/**
+  Get the value of the possibly cached item into the field.
+*/
+
+void Item_cache_wrapper::save_val(Field *to)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_int");
+  if (!expr_cache)
+  {
+    orig_item->save_val(to);
+    null_value= orig_item->null_value;
+    DBUG_VOID_RETURN;
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    cached_value->save_val(to);
+    null_value= cached_value->null_value;
+    DBUG_VOID_RETURN;
+  }
+  cache();
+  null_value= expr_value->null_value;
+  expr_value->save_val(to);
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Get the integer value of the possibly cached item.
+*/
+
+longlong Item_cache_wrapper::val_int()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_int");
+  if (!expr_cache)
+  {
+    longlong tmp= orig_item->val_int();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    longlong tmp= cached_value->val_int();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_int());
+}
+
+
+/**
+  Get the real value of the possibly cached item
+*/
+
+double Item_cache_wrapper::val_real()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_real");
+  if (!expr_cache)
+  {
+    double tmp= orig_item->val_real();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    double tmp= cached_value->val_real();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_real());
+}
+
+
+/**
+  Get the string value of the possibly cached item
+*/
+
+String *Item_cache_wrapper::val_str(String* str)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_str");
+  if (!expr_cache)
+  {
+    String *tmp= orig_item->val_str(str);
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    String *tmp= cached_value->val_str(str);
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  if ((null_value= expr_value->null_value))
+    DBUG_RETURN(NULL);
+  DBUG_RETURN(expr_value->val_str(str));
+}
+
+
+/**
+  Get the decimal value of the possibly cached item
+*/
+
+my_decimal *Item_cache_wrapper::val_decimal(my_decimal* decimal_value)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_decimal");
+  if (!expr_cache)
+  {
+    my_decimal *tmp= orig_item->val_decimal(decimal_value);
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    my_decimal *tmp= cached_value->val_decimal(decimal_value);
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  if ((null_value= expr_value->null_value))
+    DBUG_RETURN(NULL);
+  DBUG_RETURN(expr_value->val_decimal(decimal_value));
+}
+
+
+/**
+  Get the boolean value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::val_bool()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_bool");
+  if (!expr_cache)
+  {
+    bool tmp= orig_item->val_bool();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    bool tmp= cached_value->val_bool();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_bool());
+}
+
+
+/**
+  Check for NULL the value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::is_null()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::is_null");
+  if (!expr_cache)
+  {
+    bool tmp= orig_item->is_null();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    bool tmp= cached_value->is_null();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  DBUG_RETURN((null_value= expr_value->null_value));
+}
+
+
+/**
+  Get the date value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::get_date(MYSQL_TIME *ltime, uint fuzzydate)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::get_date");
+  if (!expr_cache)
+    DBUG_RETURN((null_value= orig_item->get_date(ltime, fuzzydate)));
+
+  if ((cached_value= check_cache()))
+    DBUG_RETURN((null_value= cached_value->get_date(ltime, fuzzydate)));
+
+  cache();
+  DBUG_RETURN((null_value= expr_value->get_date(ltime, fuzzydate)));
+}
+
+
+int Item_cache_wrapper::save_in_field(Field *to, bool no_conversions)
+{
+  int res;
+  DBUG_ASSERT(!result_field);
+  res= orig_item->save_in_field(to, no_conversions);
+  null_value= orig_item->null_value;
+  return res;
+}
+
+
+Item* Item_cache_wrapper::get_tmp_table_item(THD *thd_arg)
+{
+  if (!orig_item->with_sum_func && !orig_item->const_item())
+    return new Item_field(result_field);
+  return copy_or_same(thd_arg);
+}
+
+
 /**
   Prepare referenced field then call usual Item_direct_ref::fix_fields .
 
@@ -6884,6 +7777,59 @@ bool Item_outer_ref::fix_fields(THD *thd, Item **reference)
 }
 
 
+void Item_outer_ref::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  if (get_depended_from() == new_parent)
+  {
+    *ref= outer_ref;
+    (*ref)->fix_after_pullout(new_parent, ref);
+  }
+}
+
+void Item_ref::fix_after_pullout(st_select_lex *new_parent, Item **refptr)
+{
+  (*ref)->fix_after_pullout(new_parent, ref);
+  if (get_depended_from() == new_parent)
+    depended_from= NULL;
+}
+
+
+/**
+  Mark references from inner selects used in group by clause
+
+  The method is used by the walk method when called for the expressions
+  from the group by clause. The callsare  occurred in the function
+  fix_inner_refs invoked by JOIN::prepare.
+  The parameter passed to Item_outer_ref::check_inner_refs_processor
+  is the iterator over the list of inner references from the subselects
+  of the select to be prepared. The function marks those references
+  from this list whose occurrences are encountered in the group by 
+  expressions passed to the walk method.  
+ 
+  @param arg  pointer to the iterator over a list of inner references
+
+  @return
+    FALSE always
+*/
+
+bool Item_outer_ref::check_inner_refs_processor(uchar *arg)
+{
+  List_iterator_fast<Item_outer_ref> *it=
+    ((List_iterator_fast<Item_outer_ref> *) arg);
+  Item_outer_ref *ref;
+  while ((ref= (*it)++))
+  {
+    if (ref == this)
+    {
+      ref->found_in_group_by= 1;
+      break;
+    }
+  }
+  (*it).rewind();
+  return FALSE;
+}
+
+
 /**
   Compare two view column references for equality.
 
@@ -6915,6 +7861,138 @@ bool Item_direct_view_ref::eq(const Item *item, bool binary_cmp) const
   return FALSE;
 }
 
+
+Item_equal *Item_direct_view_ref::find_item_equal(COND_EQUAL *cond_equal)
+{
+  Item* field_item= real_item();
+  if (field_item->type() != FIELD_ITEM)
+    return NULL;
+  return ((Item_field *) field_item)->find_item_equal(cond_equal);  
+}
+
+
+/**
+  Check whether a reference to field item can be substituted for an equal item
+
+  @details
+  The function checks whether a substitution of a reference to field item for
+  an equal item is valid.
+
+  @param arg   *arg != NULL <-> the reference is in the context
+               where substitution for an equal item is valid
+
+  @note
+    See also the note for Item_field::subst_argument_checker
+
+  @retval
+    TRUE   substitution is valid
+  @retval
+    FALSE  otherwise
+*/
+bool Item_direct_view_ref::subst_argument_checker(uchar **arg)
+{
+  bool res= FALSE;
+  if (*arg)
+  { 
+    Item *item= real_item();
+    if (item->type() == FIELD_ITEM &&
+        (*arg == (uchar *) Item::ANY_SUBST || 
+         result_type() != STRING_RESULT ||
+         (((Item_field *) item)->field->flags & BINARY_FLAG)))
+      res= TRUE;
+  }
+  /* Block any substitution into the wrapped object */
+  if (*arg)
+    *arg= NULL; 
+  return res; 
+}
+
+
+/**
+  Set a pointer to the multiple equality the view field reference belongs to
+  (if any).
+
+  @details
+  The function looks for a multiple equality containing this item of the type
+  Item_direct_view_ref among those referenced by arg.
+  In the case such equality exists the function does the following.
+  If the found multiple equality contains a constant, then the item
+  is substituted for this constant, otherwise the function sets a pointer
+  to the multiple equality in the item.
+
+  @param arg    reference to list of multiple equalities where
+                the item (this object) is to be looked for
+
+  @note
+    This function is supposed to be called as a callback parameter in calls
+    of the compile method.
+
+  @note 
+    The function calls Item_field::equal_fields_propagator for the field item
+    this->real_item() to do the job. Then it takes the pointer to equal_item
+    from this field item and assigns it to this->item_equal.
+
+  @return
+    - pointer to the replacing constant item, if the field item was substituted
+    - pointer to the field item, otherwise.
+*/
+
+Item *Item_direct_view_ref::equal_fields_propagator(uchar *arg)
+{
+  Item *field_item= real_item();
+  if (field_item->type() != FIELD_ITEM)
+    return this;
+  Item *item= field_item->equal_fields_propagator(arg);
+  set_item_equal(field_item->get_item_equal());
+  field_item->set_item_equal(NULL);
+  if (item != field_item)
+    return item;
+  return this;
+}
+
+
+/**
+  Replace an Item_direct_view_ref for an equal Item_field evaluated earlier
+  (if any).
+
+  @details
+  If this->item_equal points to some item and coincides with arg then
+  the function returns a pointer to a field item that is referred to by the 
+  first element of the item_equal list which the Item_direct_view_ref
+  object belongs to unless item_equal contains  a constant item. In this
+  case the function returns this constant item (if the substitution does
+   not require conversion).   
+  If the Item_direct_view_item object does not refer any Item_equal object
+  'this' is returned .
+
+  @param arg   NULL or points to so some item of the Item_equal type  
+
+  @note
+    This function is supposed to be called as a callback parameter in calls
+    of the transformer method.
+
+  @note 
+    The function calls Item_field::replace_equal_field for the field item
+    this->real_item() to do the job.
+
+  @return
+    - pointer to a replacement Item_field if there is a better equal item or
+      a pointer to a constant equal item;
+    - this - otherwise.
+*/
+
+Item *Item_direct_view_ref::replace_equal_field(uchar *arg)
+{
+  Item *field_item= real_item();
+  if (field_item->type() != FIELD_ITEM)
+    return this;
+  field_item->set_item_equal(item_equal);
+  Item *item= field_item->replace_equal_field(arg);
+  field_item->set_item_equal(0);
+  return item != field_item ? item : this;
+}
+
+
 bool Item_default_value::eq(const Item *item, bool binary_cmp) const
 {
   return item->type() == DEFAULT_VALUE_ITEM && 
@@ -7287,6 +8365,8 @@ Item_result item_cmp_type(Item_result a,Item_result b)
     return INT_RESULT;
   else if (a == ROW_RESULT || b == ROW_RESULT)
     return ROW_RESULT;
+  else if (a == TIME_RESULT || b == TIME_RESULT)
+    return TIME_RESULT;
   if ((a == INT_RESULT || a == DECIMAL_RESULT) &&
       (b == INT_RESULT || b == DECIMAL_RESULT))
     return DECIMAL_RESULT;
@@ -7300,11 +8380,20 @@ void resolve_const_item(THD *thd, Item **ref, Item *comp_item)
   Item *new_item= NULL;
   if (item->basic_const_item())
     return;                                     // Can't be better
-  Item_result res_type=item_cmp_type(comp_item->result_type(),
-				     item->result_type());
+  Item_result res_type=item_cmp_type(comp_item->cmp_type(), item->cmp_type());
   char *name=item->name;			// Alloced by sql_alloc
 
   switch (res_type) {
+  case TIME_RESULT:
+  {
+    bool is_null;
+    Item **ref_copy= ref;
+    /* the following call creates a constant and puts it in new_item */
+    get_datetime_value(thd, &ref_copy, &new_item, comp_item, &is_null);
+    if (is_null)
+      new_item= new Item_null(name);
+    break;
+  }
   case STRING_RESULT:
   {
     char buff[MAX_FIELD_WIDTH];
@@ -7379,8 +8468,9 @@ void resolve_const_item(THD *thd, Item **ref, Item *comp_item)
                (Item*) new Item_decimal(name, result, length, decimals));
     break;
   }
-  default:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
+    break;
   }
   if (new_item)
     thd->change_item_tree(ref, new_item);
@@ -7400,6 +8490,9 @@ void resolve_const_item(THD *thd, Item **ref, Item *comp_item)
   @note We only use this on the range optimizer/partition pruning,
         because in some cases we can't store the value in the field
         without some precision/character loss.
+
+  @todo rewrite it to use Arg_comparator (currently it's a simplified and
+        incomplete version of it)
 */
 
 int stored_field_cmp_to_item(THD *thd, Field *field, Item *item)
@@ -7431,9 +8524,7 @@ int stored_field_cmp_to_item(THD *thd, Field *field, Item *item)
 
       if (field_type == MYSQL_TYPE_DATE)
         type= MYSQL_TIMESTAMP_DATE;
-
-      if (field_type == MYSQL_TYPE_DATETIME ||
-          field_type == MYSQL_TYPE_TIMESTAMP)
+      else
         type= MYSQL_TIMESTAMP_DATETIME;
         
       const char *field_name= field->field_name;
@@ -7457,6 +8548,25 @@ int stored_field_cmp_to_item(THD *thd, Field *field, Item *item)
     field_val= field->val_decimal(&field_buf);
     return my_decimal_cmp(item_val, field_val);
   }
+  /*
+    We have to check field->cmp_type() instead of res_type,
+    as result_type() - and thus res_type - can never be TIME_RESULT (yet).
+  */
+  if (field->cmp_type() == TIME_RESULT)
+  {
+    MYSQL_TIME field_time, item_time;
+    if (field->type() == MYSQL_TYPE_TIME)
+    {
+      field->get_time(&field_time);
+      item->get_time(&item_time);
+    }
+    else
+    {
+      field->get_date(&field_time, TIME_FUZZY_DATE | TIME_INVALID_DATES);
+      item->get_date(&item_time, TIME_FUZZY_DATE | TIME_INVALID_DATES);
+    }
+    return my_time_compare(&field_time, &item_time);
+  }
   double result= item->val_real();
   if (item->null_value)
     return 0;
@@ -7493,19 +8603,16 @@ Item_cache* Item_cache::get_cache(const Item *item, const Item_result type)
   case DECIMAL_RESULT:
     return new Item_cache_decimal();
   case STRING_RESULT:
-    /* Not all functions that return DATE/TIME are actually DATE/TIME funcs. */
-    if ((item->is_datetime() ||
-         item->field_type() == MYSQL_TYPE_TIME) &&
-        (const_cast<Item*>(item))->result_as_longlong())
-      return new Item_cache_datetime(item->field_type());
     return new Item_cache_str(item);
   case ROW_RESULT:
     return new Item_cache_row();
-  default:
-    // should never be in real life
+  case TIME_RESULT:
+    return new Item_cache_temporal(item->field_type());
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
-    return 0;
+    break;
   }
+  return 0;                                     // Impossible
 }
 
 void Item_cache::store(Item *item)
@@ -7518,6 +8625,11 @@ void Item_cache::store(Item *item)
 
 void Item_cache::print(String *str, enum_query_type query_type)
 {
+  if (value_cached)
+  {
+    print_value(str);
+    return;
+  }
   str->append(STRING_WITH_LEN("<cache>("));
   if (example)
     example->print(str, query_type);
@@ -7538,16 +8650,6 @@ bool  Item_cache_int::cache_value()
 }
 
 
-void Item_cache_int::store(Item *item, longlong val_arg)
-{
-  /* An explicit values is given, save it. */
-  value_cached= TRUE;
-  value= val_arg;
-  null_value= item->null_value;
-  unsigned_flag= item->unsigned_flag;
-}
-
-
 String *Item_cache_int::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
@@ -7583,171 +8685,104 @@ longlong Item_cache_int::val_int()
   return value;
 }
 
-bool  Item_cache_datetime::cache_value_int()
+int Item_cache_int::save_in_field(Field *field, bool no_conversions)
 {
-  if (!example)
-    return false;
-
-  value_cached= true;
-  // Mark cached string value obsolete
-  str_value_cached= false;
-
-  MYSQL_TIME ltime;
-  const bool eval_error= 
-    (field_type() == MYSQL_TYPE_TIME) ?
-    example->get_time(&ltime) :
-    example->get_date(&ltime, TIME_FUZZY_DATE);
-
-  if (eval_error)
-    int_value= 0;
-  else
-  {
-    switch(field_type())
-    {
-    case MYSQL_TYPE_DATETIME:
-    case MYSQL_TYPE_TIMESTAMP:
-      int_value= TIME_to_ulonglong_datetime(&ltime);
-      break;
-    case MYSQL_TYPE_TIME:
-      int_value= TIME_to_ulonglong_time(&ltime);
-      break;
-    default:
-      int_value= TIME_to_ulonglong_date(&ltime);
-      break;
-    }
-    if (ltime.neg)
-      int_value= -int_value;
-  }
+  int error;
+  if (!has_value())
+    return set_field_to_null_with_conversions(field, no_conversions);
 
-  null_value= example->null_value;
-  unsigned_flag= example->unsigned_flag;
+  field->set_notnull();
+  error= field->store(value, unsigned_flag);
 
-  return true;
+  return error ? error : field->table->in_use->is_error() ? 1 : 0;
 }
 
 
-bool  Item_cache_datetime::cache_value()
+Item_cache_temporal::Item_cache_temporal(enum_field_types field_type_arg):
+  Item_cache_int(field_type_arg)
 {
-  if (!example)
-    return FALSE;
-
-  if (cmp_context == INT_RESULT)
-    return cache_value_int();
-
-  str_value_cached= TRUE;
-  // Mark cached int value obsolete
-  value_cached= FALSE;
-  /* Assume here that the underlying item will do correct conversion.*/
-  String *res= example->str_result(&str_value);
-  if (res && res != &str_value)
-    str_value.copy(*res);
-  null_value= example->null_value;
-  unsigned_flag= example->unsigned_flag;
-  return TRUE;
+  if (mysql_type_to_time_type(cached_field_type) == MYSQL_TIMESTAMP_ERROR)
+    cached_field_type= MYSQL_TYPE_DATETIME;
 }
 
 
-void Item_cache_datetime::store(Item *item, longlong val_arg)
+String *Item_cache_temporal::val_str(String *str)
 {
-  /* An explicit values is given, save it. */
-  value_cached= TRUE;
-  int_value= val_arg;
-  null_value= item->null_value;
-  unsigned_flag= item->unsigned_flag;
+  DBUG_ASSERT(fixed == 1);
+  if (!has_value())
+  {
+    null_value= true;
+    return NULL;
+  }
+  return val_string_from_date(str);
 }
 
 
-void Item_cache_datetime::store(Item *item)
+bool  Item_cache_temporal::cache_value()
 {
-  Item_cache::store(item);
-  str_value_cached= FALSE;
+  if (!example)
+    return false;
+
+  value_cached= true;
+ 
+  MYSQL_TIME ltime;
+  if (example->get_date(&ltime, TIME_FUZZY_DATE))
+    value=0;
+  else
+    value= pack_time(&ltime);
+  null_value= example->null_value;
+  return true;
 }
 
-String *Item_cache_datetime::val_str(String *str)
+
+bool Item_cache_temporal::get_date(MYSQL_TIME *ltime, uint fuzzydate)
 {
-  DBUG_ASSERT(fixed == 1);
+  ErrConvInteger str(value);
 
-  if ((value_cached || str_value_cached) && null_value)
-    return NULL;
+  if (!has_value())
+  {
+    bzero((char*) ltime,sizeof(*ltime));
+    return 1;
+  }
 
-  if (!str_value_cached)
+  unpack_time(value, ltime);
+  ltime->time_type= mysql_type_to_time_type(field_type());
+  if (ltime->time_type == MYSQL_TIMESTAMP_TIME)
   {
-    /*
-      When it's possible the Item_cache_datetime uses INT datetime
-      representation due to speed reasons. But still, it always has the STRING
-      result type and thus it can be asked to return a string value. 
-      It is possible that at this time cached item doesn't contain correct
-      string value, thus we have to convert cached int value to string and
-      return it.
-    */
-    if (value_cached)
-    {
-      MYSQL_TIME ltime;
-      /* Return NULL in case of OOM/conversion error. */
-      null_value= TRUE;
-      if (str_value.alloc(MAX_DATE_STRING_REP_LENGTH))
-        return NULL;
-      if (cached_field_type == MYSQL_TYPE_TIME)
-      {
-        longlong time= int_value;
-        set_zero_time(&ltime, MYSQL_TIMESTAMP_TIME);
-        if (time < 0)
-        {
-          time= -time;
-          ltime.neg= TRUE;
-        }
-        DBUG_ASSERT(time <= TIME_MAX_VALUE);
-        ltime.second= time % 100;
-        time/= 100;
-        ltime.minute= time % 100;
-        time/= 100;
-        ltime.hour= time;
-      }
-      else
-      {
-        int was_cut;
-        longlong res;
-        res= number_to_datetime(int_value, &ltime, TIME_FUZZY_DATE, &was_cut);
-        if (res == -1)
-          return NULL;
-      }
-      str_value.length(my_TIME_to_str(&ltime,
-                                      const_cast<char*>(str_value.ptr())));
-      str_value_cached= TRUE;
-      null_value= FALSE;
-    }
-    else if (!cache_value())
-      return NULL;
+    ltime->hour+= (ltime->month*32+ltime->day)*24;
+    ltime->month= ltime->day= 0;
   }
-  return &str_value;
+  return 0;
+ 
 }
 
 
-my_decimal *Item_cache_datetime::val_decimal(my_decimal *decimal_val)
+int Item_cache_temporal::save_in_field(Field *field, bool no_conversions)
 {
-  DBUG_ASSERT(fixed == 1);
+  int error;
   if (!has_value())
-    return NULL;
-  int2my_decimal(E_DEC_FATAL_ERROR, int_value, unsigned_flag, decimal_val);
-  return decimal_val;
-}
+    return set_field_to_null_with_conversions(field, no_conversions);
 
-double Item_cache_datetime::val_real()
-{
-  DBUG_ASSERT(fixed == 1);
-  if ((!value_cached && !cache_value_int()) || null_value)
-    return 0.0;
-  return (double) int_value;
+  field->set_notnull();
+ 
+  MYSQL_TIME ltime;
+  unpack_time(value, &ltime);
+  ltime.time_type= mysql_type_to_time_type(field_type());
+  error= field->store_time_dec(&ltime, decimals);
+ 
+  return error ? error : field->table->in_use->is_error() ? 1 : 0;
 }
 
-longlong Item_cache_datetime::val_int()
+
+void Item_cache_temporal::store_packed(longlong val_arg)
 {
-  DBUG_ASSERT(fixed == 1);
-  if ((!value_cached && !cache_value_int()) || null_value)
-    return 0;
-  return int_value;
+  /* An explicit values is given, save it. */
+  value_cached= true;
+  value= val_arg;
+  null_value= false;
 }
 
+
 bool Item_cache_real::cache_value()
 {
   if (!example)
@@ -7925,7 +8960,7 @@ my_decimal *Item_cache_str::val_decimal(my_decimal *decimal_val)
 int Item_cache_str::save_in_field(Field *field, bool no_conversions)
 {
   if (!has_value())
-    return 0;
+    return set_field_to_null_with_conversions(field, no_conversions);
   int res= Item_cache::save_in_field(field, no_conversions);
   return (is_varbinary && field->type() == MYSQL_TYPE_STRING &&
           value->length() < field->field_length) ? 1 : res;
@@ -8079,12 +9114,14 @@ Item_result Item_type_holder::result_type() const
 
 enum_field_types Item_type_holder::get_real_type(Item *item)
 {
+  if (item->type() == REF_ITEM)
+    item= item->real_item();
   switch(item->type())
   {
   case FIELD_ITEM:
   {
     /*
-      Item_fields::field_type ask Field_type() but sometimes field return
+      Item_field::field_type ask Field_type() but sometimes field return
       a different type, like for enum/set, so we need to ask real type.
     */
     Field *field= ((Item_field *) item)->field;
@@ -8126,7 +9163,8 @@ enum_field_types Item_type_holder::get_real_type(Item *item)
       case DECIMAL_RESULT:
         return MYSQL_TYPE_NEWDECIMAL;
       case ROW_RESULT:
-      default:
+      case TIME_RESULT:
+      case IMPOSSIBLE_RESULT:
         DBUG_ASSERT(0);
         return MYSQL_TYPE_VAR_STRING;
       }
@@ -8453,6 +9491,49 @@ void view_error_processor(THD *thd, void *data)
   ((TABLE_LIST *)data)->hide_view_error(thd);
 }
 
+
+st_select_lex *Item_ident::get_depended_from() const
+{
+  st_select_lex *dep;
+  if ((dep= depended_from))
+    for ( ; dep->merged_into; dep= dep->merged_into) ;
+  return dep;
+}
+
+
+table_map Item_ref::used_tables() const		
+{
+  return get_depended_from() ? OUTER_REF_TABLE_BIT : (*ref)->used_tables(); 
+}
+
+
+void Item_ref::update_used_tables() 
+{ 
+  if (!get_depended_from())
+    (*ref)->update_used_tables(); 
+}
+
+
+table_map Item_direct_view_ref::used_tables() const		
+{
+  return get_depended_from() ? 
+         OUTER_REF_TABLE_BIT :
+         (view->merged ? (*ref)->used_tables() : view->table->map); 
+}
+
+
+/*
+  we add RAND_TABLE_BIT to prevent moving this item from HAVING to WHERE
+*/
+table_map Item_ref_null_helper::used_tables() const
+{
+  return (get_depended_from() ?
+          OUTER_REF_TABLE_BIT :
+          (*ref)->used_tables() | RAND_TABLE_BIT);
+}
+
+
+
 /*****************************************************************************
 ** Instantiate templates
 *****************************************************************************/
diff --git a/sql/item.h b/sql/item.h
index 46916346ebe..cda2ab73c4a 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -1,7 +1,8 @@
-#ifndef ITEM_INCLUDED
-#define ITEM_INCLUDED
+#ifndef SQL_ITEM_INCLUDED
+#define SQL_ITEM_INCLUDED
 
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -28,6 +29,27 @@
 #include "thr_malloc.h"                         /* sql_calloc */
 #include "field.h"                              /* Derivation */
 
+C_MODE_START
+#include <ma_dyncol.h>
+C_MODE_END
+
+static inline
+bool trace_unsupported_func(const char *where, const char *processor_name)
+{
+  char buff[64];                                                         
+  sprintf(buff, "%s::%s", where, processor_name);
+  DBUG_ENTER(buff);
+  sprintf(buff, "%s returns TRUE: unsupported function", processor_name);
+  DBUG_PRINT("info", ("%s", buff));
+  DBUG_RETURN(TRUE);
+}
+
+static inline
+bool trace_unsupported_by_check_vcol_func_processor(const char *where)
+{
+  return trace_unsupported_func(where, "check_vcol_func_processor");
+}
+
 class Protocol;
 struct TABLE_LIST;
 void item_init(void);			/* Init item functions */
@@ -489,6 +511,17 @@ public:
 };
 
 
+struct st_dyncall_create_def
+{
+  Item  *num, *value;
+  CHARSET_INFO *cs;
+  uint len, frac;
+  DYNAMIC_COLUMN_TYPE type;
+};
+
+typedef struct st_dyncall_create_def DYNCALL_CREATE_DEF;
+
+
 typedef bool (Item::*Item_processor) (uchar *arg);
 /*
   Analyzer function
@@ -505,10 +538,24 @@ typedef bool (Item::*Item_analyzer) (uchar **argp);
 typedef Item* (Item::*Item_transformer) (uchar *arg);
 typedef void (*Cond_traverser) (const Item *item, void *arg);
 
+class Item_equal;
+class COND_EQUAL;
+
 
 class Item {
   Item(const Item &);			/* Prevent use of these */
   void operator=(Item &);
+  /**
+    The index in the JOIN::join_tab array of the JOIN_TAB this Item is attached
+    to. Items are attached (or 'pushed') to JOIN_TABs during optimization by the
+    make_cond_for_table procedure. During query execution, this item is
+    evaluated when the join loop reaches the corresponding JOIN_TAB.
+
+    If the value of join_tab_idx >= MAX_TABLES, this means that there is no
+    corresponding JOIN_TAB.
+  */
+  uint join_tab_idx;
+
 public:
   static void *operator new(size_t size) throw ()
   { return sql_alloc(size); }
@@ -525,12 +572,15 @@ public:
              SUBSELECT_ITEM, ROW_ITEM, CACHE_ITEM, TYPE_HOLDER,
              PARAM_ITEM, TRIGGER_FIELD_ITEM, DECIMAL_ITEM,
              XPATH_NODESET, XPATH_NODESET_CMP,
-             VIEW_FIXER_ITEM};
+             VIEW_FIXER_ITEM, EXPR_CACHE_ITEM};
 
   enum cond_result { COND_UNDEF,COND_OK,COND_TRUE,COND_FALSE };
 
   enum traverse_order { POSTFIX, PREFIX };
   
+  /* Cache of the result of is_expensive(). */
+  int8 is_expensive_cache;
+  
   /* Reuse size, only used by SP local variable assignment, otherwize 0 */
   uint rsize;
 
@@ -552,23 +602,30 @@ public:
   Item *next;
   uint32 max_length;                    /* Maximum length, in bytes */
   /*
-    TODO: convert name and name_length fields into String to keep them in sync
-    (see bug #11829681/60295 etc).
+    TODO: convert name and name_length fields into LEX_STRING to keep them in
+    sync (see bug #11829681/60295 etc). Then also remove some strlen(name)
+    calls.
   */
   uint name_length;                     /* Length of name */
   int8 marker;
   uint8 decimals;
-  my_bool maybe_null;			/* If item may be null */
-  my_bool null_value;			/* if item is null */
-  my_bool unsigned_flag;
-  my_bool with_sum_func;
-  my_bool fixed;                        /* If item fixed with fix_fields */
-  my_bool is_autogenerated_name;        /* indicate was name of this Item
+  bool maybe_null;			/* If item may be null */
+  bool in_rollup;                       /* If used in GROUP BY list
+                                           of a query with ROLLUP */ 
+  bool null_value;			/* if item is null */
+  bool unsigned_flag;
+  bool with_sum_func;                   /* True if item contains a sum func */
+  /**
+    True if any item except Item_sum_func contains a field. Set during parsing.
+  */
+  bool with_field;
+  bool fixed;                           /* If item fixed with fix_fields */
+  bool is_autogenerated_name;           /* indicate was name of this Item
                                            autogenerated or set by user */
-  DTCollation collation;
-  my_bool with_subselect;               /* If this item is a subselect or some
+  bool with_subselect;                  /* If this item is a subselect or some
                                            of its arguments is or contains a
                                            subselect */
+  DTCollation collation;
   Item_result cmp_context;              /* Comparison context */
   // alloc & destruct is done as start of select using sql_alloc
   Item();
@@ -595,10 +652,23 @@ public:
   Field *make_string_field(TABLE *table);
   virtual bool fix_fields(THD *, Item **);
   /*
-    should be used in case where we are sure that we do not need
+    Fix after some tables has been pulled out. Basically re-calculate all
+    attributes that are dependent on the tables.
+  */
+  virtual void fix_after_pullout(st_select_lex *new_parent, Item **ref) {};
+
+  /*
+    This method should be used in case where we are sure that we do not need
     complete fix_fields() procedure.
+    Usually this method is used by the optimizer when it has to create a new
+    item out of other already fixed items. For example, if the optimizer has
+    to create a new Item_func for an inferred equality whose left and right
+    parts are already fixed items. In some cases the optimizer cannot use
+    directly fixed items as the arguments of the created functional item, 
+    but rather uses intermediate type conversion items. Then the method is
+    supposed to be applied recursively.  
   */
-  inline void quick_fix_field() { fixed= 1; }
+  virtual inline void quick_fix_field() { fixed= 1; }
   /* Function returns 1 on overflow and -1 on fatal errors */
   int save_in_field_no_warnings(Field *field, bool no_conversions);
   virtual int save_in_field(Field *field, bool no_conversions);
@@ -608,11 +678,20 @@ public:
   { return save_in_field(field, 1); }
   virtual bool send(Protocol *protocol, String *str);
   virtual bool eq(const Item *, bool binary_cmp) const;
+  /* result_type() of an item specifies how the value should be returned */
   virtual Item_result result_type() const { return REAL_RESULT; }
-  virtual Item_result cast_to_int_type() const { return result_type(); }
+  /* ... while cmp_type() specifies how it should be compared */
+  virtual Item_result cmp_type() const;
+  virtual Item_result cast_to_int_type() const { return cmp_type(); }
   virtual enum_field_types string_field_type() const;
   virtual enum_field_types field_type() const;
   virtual enum Type type() const =0;
+  /*
+    real_type() is the type of base item.  This is same as type() for
+    most items, except Item_ref() and Item_cache_wrapper() where it
+    shows the type for the underlaying item.
+  */
+  virtual enum Type real_type() const { return type(); }
   
   /*
     Return information about function monotonicity. See comment for
@@ -821,10 +900,22 @@ public:
   */
   virtual bool val_bool();
   virtual String *val_nodeset(String*) { return 0; }
+
+  /*
+    save_val() is method of val_* family which stores value in the given
+    field.
+  */
+  virtual void save_val(Field *to) { save_org_in_field(to); }
+  /*
+    save_result() is method of val*result() family which stores value in
+    the given field.
+  */
+  virtual void save_result(Field *to) { save_val(to); }
   /* Helper functions, see item_sum.cc */
   String *val_string_from_real(String *str);
   String *val_string_from_int(String *str);
   String *val_string_from_decimal(String *str);
+  String *val_string_from_date(String *str);
   my_decimal *val_decimal_from_real(my_decimal *decimal_value);
   my_decimal *val_decimal_from_int(my_decimal *decimal_value);
   my_decimal *val_decimal_from_string(my_decimal *decimal_value);
@@ -841,6 +932,8 @@ public:
   /* This is also used to create fields in CREATE ... SELECT: */
   virtual Field *tmp_table_field(TABLE *t_arg) { return 0; }
   virtual const char *full_name() const { return name ? name : "???"; }
+  const char *field_name_or_null()
+  { return real_item()->type() == Item::FIELD_ITEM ? name : NULL; }
 
   /*
     *result* family of methods is analog of *val* family (see above) but
@@ -855,9 +948,18 @@ public:
   { return val_decimal(val); }
   virtual bool val_bool_result() { return val_bool(); }
   virtual bool is_null_result() { return is_null(); }
-
-  /* bit map of tables used by item */
+  /*
+    Returns 1 if result type and collation for val_str() can change between
+    calls
+  */
+  virtual bool dynamic_result() { return 0; }
+  /* 
+    Bitmap of tables used by item
+    (note: if you need to check dependencies on individual columns, check out
+     class Field_enumerator)
+  */
   virtual table_map used_tables() const { return (table_map) 0L; }
+  virtual table_map all_used_tables() const { return used_tables(); }
   /*
     Return table map of tables that can't be NULL tables (tables that are
     used in a context where if they would contain a NULL row generated
@@ -912,6 +1014,7 @@ public:
   }
 
   void print_item_w_name(String *, enum_query_type query_type);
+  void print_value(String *);
   virtual void update_used_tables() {}
   virtual void split_sum_func(THD *thd, Item **ref_pointer_array,
                               List<Item> &fields) {}
@@ -919,7 +1022,9 @@ public:
   void split_sum_func2(THD *thd, Item **ref_pointer_array, List<Item> &fields,
                        Item **ref, bool skip_registered);
   virtual bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  virtual bool get_time(MYSQL_TIME *ltime);
+  bool get_time(MYSQL_TIME *ltime)
+  { return get_date(ltime, TIME_TIME_ONLY | TIME_FUZZY_DATE); }
+  bool get_seconds(ulonglong *sec, ulong *sec_part);
   virtual bool get_date_result(MYSQL_TIME *ltime,uint fuzzydate)
   { return get_date(ltime,fuzzydate); }
   /*
@@ -959,6 +1064,7 @@ public:
     set value of aggregate function in case of no rows for grouping were found
   */
   virtual void no_rows_in_result() {}
+  virtual void restore_to_before_no_rows_in_result() {}
   virtual Item *copy_or_same(THD *thd) { return this; }
   virtual Item *copy_andor_structure(THD *thd) { return this; }
   virtual Item *real_item() { return this; }
@@ -1027,15 +1133,45 @@ public:
   virtual bool remove_fixed(uchar * arg) { fixed= 0; return 0; }
   virtual bool cleanup_processor(uchar *arg);
   virtual bool collect_item_field_processor(uchar * arg) { return 0; }
+  virtual bool add_field_to_set_processor(uchar * arg) { return 0; }
   virtual bool find_item_in_field_list_processor(uchar *arg) { return 0; }
   virtual bool change_context_processor(uchar *context) { return 0; }
   virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; }
   virtual bool is_expensive_processor(uchar *arg) { return 0; }
-  virtual bool find_item_processor(uchar *arg) { return this == (void *) arg; }
   virtual bool register_field_in_read_map(uchar *arg) { return 0; }
+  virtual bool register_field_in_write_map(uchar *arg) { return 0; }
+  virtual bool enumerate_field_refs_processor(uchar *arg) { return 0; }
+  virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; }
+  virtual bool eliminate_subselect_processor(uchar *arg) { return 0; }
+  virtual bool set_fake_select_as_master_processor(uchar *arg) { return 0; }
+  virtual bool view_used_tables_processor(uchar *arg) { return 0; }
+  virtual bool eval_not_null_tables(uchar *opt_arg) { return 0; }
+  virtual bool clear_sum_processor(uchar *opt_arg) { return 0; }
+
+  /* To call bool function for all arguments */
+  struct bool_func_call_args
+  {
+    Item *original_func_item;
+    void (Item::*bool_function)();
+  };
+  bool call_bool_func_processor(uchar *org_item)
+  {
+    bool_func_call_args *info= (bool_func_call_args*) org_item;
+    /* Avoid recursion, as walk also calls for original item */
+    if (info->original_func_item != this)
+      (this->*(info->bool_function))();
+    return FALSE;
+  }
+
+  /*
+    The next function differs from the previous one that a bitmap to be updated
+    is passed as uchar *arg.
+  */
+  virtual bool register_field_in_bitmap(uchar *arg) { return 0; }
+
+  bool cache_const_expr_analyzer(uchar **arg);
+  Item* cache_const_expr_transformer(uchar *arg);
 
-  virtual bool cache_const_expr_analyzer(uchar **arg);
-  virtual Item* cache_const_expr_transformer(uchar *arg);
   /*
     Check if a partition function is allowed
     SYNOPSIS
@@ -1088,11 +1224,54 @@ public:
     fields.
   */
   virtual bool check_partition_func_processor(uchar *bool_arg) { return TRUE;}
+  /*
+    @brief
+    Processor used to mark virtual columns used in partitioning expression
+
+    @param
+    arg     always ignored
+
+    @retval
+      FALSE      always
+  */
+  virtual bool vcol_in_partition_func_processor(uchar *arg)
+  {
+    return FALSE;
+  }
+
+  /*
+    The enumeration Subst_constraint is currently used only in implementations
+    of the virtual function subst_argument_checker.
+  */ 
+  enum Subst_constraint 
+  { 
+    NO_SUBST= 0,         /* No substitution for a field is allowed   */
+    ANY_SUBST,           /* Any substitution for a field is allowed  */ 
+    IDENTITY_SUBST       /* Substitution for a field is allowed if any two
+                            different values of the field type are not equal */
+  };
+
   virtual bool subst_argument_checker(uchar **arg)
   { 
-    if (*arg)
-      *arg= NULL; 
-    return TRUE;     
+    return (*arg != NULL); 
+  }
+
+  /*
+    @brief
+    Processor used to check acceptability of an item in the defining
+    expression for a virtual column 
+    
+    @param
+      arg     always ignored
+      
+    @retval
+      FALSE    the item is accepted in the definition of a virtual column
+    @retval 
+      TRUE     otherwise
+  */
+  virtual bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor(full_name());
   }
 
   virtual Item *equal_fields_propagator(uchar * arg) { return this; }
@@ -1107,6 +1286,15 @@ public:
   {
     return FALSE;
   }
+  struct Collect_deps_prm
+  {
+    int nest_level;
+    List<Item> *parameters;
+  };
+  /**
+    Collect outer references
+  */
+  virtual bool collect_outer_ref_processor(uchar *arg) {return FALSE; }
 
   /**
     Find a function of a given type
@@ -1125,6 +1313,8 @@ public:
     return FALSE;
   }
 
+  virtual bool check_inner_refs_processor(uchar *arg) { return FALSE; }
+
   /*
     For SP local variable returns pointer to Item representing its
     current value and pointer to current Item otherwise.
@@ -1153,6 +1343,8 @@ public:
 
   virtual Item *neg_transformer(THD *thd) { return NULL; }
   virtual Item *update_value_transformer(uchar *select_arg) { return this; }
+  virtual Item *expr_cache_insert_transformer(uchar *thd_arg) { return this; }
+  virtual bool expr_cache_is_needed(THD *) { return FALSE; }
   virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
   void delete_self()
   {
@@ -1170,47 +1362,39 @@ public:
   {
     return 0;
   }
-  /*
-    result_as_longlong() must return TRUE for Items representing DATE/TIME
-    functions and DATE/TIME table fields.
-    Those Items have result_type()==STRING_RESULT (and not INT_RESULT), but
-    their values should be compared as integers (because the integer
-    representation is more precise than the string one).
-  */
-  virtual bool result_as_longlong() { return FALSE; }
-  inline bool is_datetime() const
-  {
-    switch (field_type())
-    {
-      case MYSQL_TYPE_DATE:
-      case MYSQL_TYPE_DATETIME:
-      case MYSQL_TYPE_TIMESTAMP:
-        return TRUE;
-      default:
-        break;
-    }
-    return FALSE;
-  }
   /**
     Check whether this and the given item has compatible comparison context.
     Used by the equality propagation. See Item_field::equal_fields_propagator.
 
     @return
-      TRUE  if the context is the same or if fields could be
-            compared as DATETIME values by the Arg_comparator.
+      TRUE  if the context is the same
       FALSE otherwise.
   */
   inline bool has_compatible_context(Item *item) const
   {
-    /* Same context. */
-    if (cmp_context == (Item_result)-1 || item->cmp_context == cmp_context)
-      return TRUE;
-    /* DATETIME comparison context. */
-    if (is_datetime())
-      return item->is_datetime() || item->cmp_context == STRING_RESULT;
-    if (item->is_datetime())
-      return is_datetime() || cmp_context == STRING_RESULT;
-    return FALSE;
+    return cmp_context == IMPOSSIBLE_RESULT || item->cmp_context == cmp_context;
+  }
+  /*
+    Test whether an expression is expensive to compute. Used during
+    optimization to avoid computing expensive expressions during this
+    phase. Also used to force temp tables when sorting on expensive
+    functions.
+    TODO:
+    Normally we should have a method:
+      cost Item::execution_cost(),
+    where 'cost' is either 'double' or some structure of various cost
+    parameters.
+
+    NOTE
+      This function is now used to prevent evaluation of materialized IN
+      subquery predicates before it is allowed. grep for 
+      DontEvaluateMaterializedSubqueryTooEarly to see the uses.
+  */
+  virtual bool is_expensive()
+  {
+    if (is_expensive_cache < 0)
+      is_expensive_cache= walk(&Item::is_expensive_processor, 0, (uchar*)0);
+    return test(is_expensive_cache);
   }
   virtual Field::geometry_type get_geometry_type() const
     { return Field::GEOM_GEOMETRY; };
@@ -1240,20 +1424,79 @@ public:
     else
       max_length= (uint32) max_result_length;
   }
-  void fix_length_and_charset_datetime(uint32 max_char_length_arg)
-  {
-    collation.set(&my_charset_numeric, DERIVATION_NUMERIC, MY_REPERTOIRE_ASCII);
-    fix_char_length(max_char_length_arg);
-  }
   /*
     Return TRUE if the item points to a column of an outer-joined table.
   */
   virtual bool is_outer_field() const { DBUG_ASSERT(fixed); return FALSE; }
+  Item* set_expr_cache(THD *thd);
+  virtual Item *get_cached_item() { return NULL; }
+
+  virtual Item_equal *get_item_equal() { return NULL; }
+  virtual void set_item_equal(Item_equal *item_eq) {};
+  virtual Item_equal *find_item_equal(COND_EQUAL *cond_equal) { return NULL; }
+  /**
+    Set the join tab index to the minimal (left-most) JOIN_TAB to which this
+    Item is attached. The number is an index is depth_first_tab() traversal
+    order.
+  */
+  virtual void set_join_tab_idx(uint join_tab_idx_arg)
+  {
+    if (join_tab_idx_arg < join_tab_idx)
+      join_tab_idx= join_tab_idx_arg;
+  }
+  virtual uint get_join_tab_idx() { return join_tab_idx; }
+
+  table_map view_used_tables(TABLE_LIST *view)
+  {
+    view->view_used_tables= 0;
+    walk(&Item::view_used_tables_processor, 0, (uchar *) view);
+    return view->view_used_tables;
+  }
+
+  /**
+    Collect and add to the list cache parameters for this Item.
+
+    @note Now implemented only for subqueries and in_optimizer,
+    if we need it for general function then this method should
+    be defined for Item_func.
+  */
+  virtual void get_cache_parameters(List<Item> &parameters) { };
 };
 
 
-class sp_head;
+/**
+  Compare two Items for List<Item>::add_unique()
+*/
 
+bool cmp_items(Item *a, Item *b);
+
+
+/*
+  Class to be used to enumerate all field references in an item tree. This
+  includes references to outside but not fields of the tables within a
+  subquery.
+  Suggested usage:
+
+    class My_enumerator : public Field_enumerator 
+    {
+      virtual void visit_field() { ... your actions ...} 
+    }
+
+    My_enumerator enumerator;
+    item->walk(Item::enumerate_field_refs_processor, ...,(uchar*)&enumerator);
+
+  This is similar to Visitor pattern.
+*/
+
+class Field_enumerator
+{
+public:
+  virtual void visit_field(Item_field *field)= 0;
+  virtual ~Field_enumerator() {};             /* purecov: inspected */
+  Field_enumerator() {}                       /* Remove gcc warning */
+};
+
+class sp_head;
 
 class Item_basic_constant :public Item
 {
@@ -1548,6 +1791,10 @@ public:
   {
     return value_item->send(protocol, str);
   }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("name_const");
+  }
 };
 
 bool agg_item_collations(DTCollation &c, const char *name,
@@ -1599,6 +1846,7 @@ public:
   virtual Item_num *neg()= 0;
   Item *safe_charset_converter(CHARSET_INFO *tocs);
   bool check_partition_func_processor(uchar *int_arg) { return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 };
 
 #define NO_CACHED_FIELD_INDEX ((uint)(-1))
@@ -1636,6 +1884,19 @@ public:
   */
   TABLE_LIST *cached_table;
   st_select_lex *depended_from;
+  /*
+    Some Items resolved in another select should not be marked as dependency
+    of the subquery where they are. During normal name resolution, we check
+    this. Stored procedures and prepared statements first try to resolve an
+    ident item using a cached table reference and field position from the
+    previous query execution (cached_table/cached_field_index). If the
+    tables were not changed, the ident matches the table/field, and we have
+    faster resolution of the ident without looking through all tables and
+    fields in the query. But in this case, we can not check all conditions
+    about this ident item dependency, so we should cache the condition in
+    this variable.
+  */
+  bool can_be_depended;
   Item_ident(Name_resolution_context *context_arg,
              const char *db_name_arg, const char *table_name_arg,
              const char *field_name_arg);
@@ -1643,10 +1904,15 @@ public:
   Item_ident(TABLE_LIST *view_arg, const char *field_name_arg);
   const char *full_name() const;
   void cleanup();
+  st_select_lex *get_depended_from() const;
   bool remove_dependence_processor(uchar * arg);
   virtual void print(String *str, enum_query_type query_type);
   virtual bool change_context_processor(uchar *cntx)
     { context= (Name_resolution_context *)cntx; return FALSE; }
+  /**
+    Collect outer references
+  */
+  virtual bool collect_outer_ref_processor(uchar *arg);
   friend bool insert_fields(THD *thd, Name_resolution_context *context,
                             const char *db_name,
                             const char *table_name, List_iterator<Item> *it,
@@ -1677,9 +1943,6 @@ public:
 };
 
 
-class Item_equal;
-class COND_EQUAL;
-
 class Item_field :public Item_ident
 {
 protected:
@@ -1720,6 +1983,7 @@ public:
   longlong val_int();
   my_decimal *val_decimal(my_decimal *);
   String *val_str(String*);
+  void save_result(Field *to);
   double val_result();
   longlong val_int_result();
   String *str_result(String* tmp);
@@ -1729,17 +1993,19 @@ public:
   bool send(Protocol *protocol, String *str_arg);
   void reset_field(Field *f);
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   void make_field(Send_field *tmp_field);
   int save_in_field(Field *field,bool no_conversions);
   void save_org_in_field(Field *field);
   table_map used_tables() const;
+  table_map all_used_tables() const; 
   enum Item_result result_type () const
   {
     return field->result_type();
   }
   Item_result cast_to_int_type() const
   {
-    return field->cast_to_int_type();
+    return field->cmp_type();
   }
   enum_field_types field_type() const
   {
@@ -1754,19 +2020,22 @@ public:
   Field *tmp_table_field(TABLE *t_arg) { return result_field; }
   bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
   bool get_date_result(MYSQL_TIME *ltime,uint fuzzydate);
-  bool get_time(MYSQL_TIME *ltime);
   bool is_null() { return field->is_null(); }
   void update_null_value();
   Item *get_tmp_table_item(THD *thd);
   bool collect_item_field_processor(uchar * arg);
+  bool add_field_to_set_processor(uchar * arg);
   bool find_item_in_field_list_processor(uchar *arg);
   bool register_field_in_read_map(uchar *arg);
+  bool register_field_in_write_map(uchar *arg);
+  bool register_field_in_bitmap(uchar *arg);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool vcol_in_partition_func_processor(uchar *bool_arg);
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
+  bool enumerate_field_refs_processor(uchar *arg);
   void cleanup();
-  bool result_as_longlong()
-  {
-    return field->can_be_compared_as_longlong();
-  }
+  Item_equal *get_item_equal() { return item_equal; }
+  void set_item_equal(Item_equal *item_eq) { item_equal= item_eq; }
   Item_equal *find_item_equal(COND_EQUAL *cond_equal);
   bool subst_argument_checker(uchar **arg);
   Item *equal_fields_propagator(uchar *arg);
@@ -1828,6 +2097,7 @@ public:
 
   Item *safe_charset_converter(CHARSET_INFO *tocs);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 };
 
 class Item_null_result :public Item_null
@@ -1841,7 +2111,11 @@ public:
     save_in_field(result_field, no_conversions);
   }
   bool check_partition_func_processor(uchar *int_arg) {return TRUE;}
-};  
+  bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor(full_name());
+  }
+};
 
 /* Item represents one placeholder ('?') of prepared statement */
 
@@ -1919,6 +2193,7 @@ public:
   Item_param(uint pos_in_query_arg);
 
   enum Item_result result_type () const { return item_result_type; }
+  enum Item_result cast_to_int_type() const { return item_result_type; }
   enum Type type() const { return item_type; }
   enum_field_types field_type() const { return param_type; }
 
@@ -1926,7 +2201,6 @@ public:
   longlong val_int();
   my_decimal *val_decimal(my_decimal*);
   String *val_str(String*);
-  bool get_time(MYSQL_TIME *tm);
   bool get_date(MYSQL_TIME *tm, uint fuzzydate);
   int  save_in_field(Field *field, bool no_conversions);
 
@@ -2038,6 +2312,7 @@ public:
   { return (uint)(max_length - test(value < 0)); }
   bool eq(const Item *, bool binary_cmp) const;
   bool check_partition_func_processor(uchar *bool_arg) { return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 };
 
 
@@ -2045,17 +2320,28 @@ class Item_uint :public Item_int
 {
 public:
   Item_uint(const char *str_arg, uint length);
-  Item_uint(ulonglong i) :Item_int((ulonglong) i, 10) {}
+  Item_uint(ulonglong i) :Item_int(i, 10) {}
   Item_uint(const char *str_arg, longlong i, uint length);
   double val_real()
     { DBUG_ASSERT(fixed == 1); return ulonglong2double((ulonglong)value); }
   String *val_str(String*);
   Item *clone_item() { return new Item_uint(name, value, max_length); }
-  int save_in_field(Field *field, bool no_conversions);
   virtual void print(String *str, enum_query_type query_type);
   Item_num *neg ();
   uint decimal_precision() const { return max_length; }
-  bool check_partition_func_processor(uchar *bool_arg) { return FALSE;}
+};
+
+
+class Item_datetime :public Item_int
+{
+protected:
+  MYSQL_TIME ltime;
+public:
+  Item_datetime() :Item_int(0) { unsigned_flag=0; }
+  int save_in_field(Field *field, bool no_conversions);
+  longlong val_int();
+  double val_real() { return (double)val_int(); }
+  void set(longlong packed);
 };
 
 
@@ -2097,6 +2383,7 @@ public:
   bool eq(const Item *, bool binary_cmp) const;
   void set_decimal_value(my_decimal *value_par);
   bool check_partition_func_processor(uchar *bool_arg) { return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 };
 
 
@@ -2254,6 +2541,7 @@ public:
   }
   virtual void print(String *str, enum_query_type query_type);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 
   /**
     Return TRUE if character-set-introducer was explicitly specified in the
@@ -2300,9 +2588,11 @@ private:
 
 
 longlong 
-longlong_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end);
+longlong_from_string_with_check(CHARSET_INFO *cs, const char *cptr,
+                                const char *end);
 double 
-double_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end);
+double_from_string_with_check(CHARSET_INFO *cs, const char *cptr,
+                              const char *end);
 
 class Item_static_string_func :public Item_string
 {
@@ -2321,6 +2611,10 @@ public:
   }
 
   bool check_partition_func_processor(uchar *int_arg) {return TRUE;}
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name);
+  }
 };
 
 
@@ -2328,10 +2622,14 @@ public:
 class Item_partition_func_safe_string: public Item_string
 {
 public:
-  Item_partition_func_safe_string(const char *name, uint length,
+  Item_partition_func_safe_string(const char *name_arg, uint length,
                                   CHARSET_INFO *cs= NULL):
-    Item_string(name, length, cs)
+    Item_string(name_arg, length, cs)
   {}
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("safe_string");
+  }
 };
 
 
@@ -2339,10 +2637,11 @@ class Item_return_date_time :public Item_partition_func_safe_string
 {
   enum_field_types date_time_field_type;
 public:
-  Item_return_date_time(const char *name_arg, enum_field_types field_type_arg)
-    :Item_partition_func_safe_string(name_arg, 0, &my_charset_bin),
+  Item_return_date_time(const char *name_arg, uint length_arg,
+                        enum_field_types field_type_arg)
+    :Item_partition_func_safe_string(name_arg, length_arg, &my_charset_bin),
      date_time_field_type(field_type_arg)
-  { }
+  { decimals= 0; }
   enum_field_types field_type() const { return date_time_field_type; }
 };
 
@@ -2350,8 +2649,8 @@ public:
 class Item_blob :public Item_partition_func_safe_string
 {
 public:
-  Item_blob(const char *name, uint length) :
-    Item_partition_func_safe_string(name, length, &my_charset_bin)
+  Item_blob(const char *name_arg, uint length) :
+    Item_partition_func_safe_string(name_arg, length, &my_charset_bin)
   { max_length= length; }
   enum Type type() const { return TYPE_HOLDER; }
   enum_field_types field_type() const { return MYSQL_TYPE_BLOB; }
@@ -2379,8 +2678,8 @@ class Item_return_int :public Item_int
   enum_field_types int_field_type;
 public:
   Item_return_int(const char *name_arg, uint length,
-		  enum_field_types field_type_arg, longlong value= 0)
-    :Item_int(name_arg, value, length), int_field_type(field_type_arg)
+		  enum_field_types field_type_arg, longlong value_arg= 0)
+    :Item_int(name_arg, value_arg, length), int_field_type(field_type_arg)
   {
     unsigned_flag=1;
   }
@@ -2411,6 +2710,7 @@ public:
   bool eq(const Item *item, bool binary_cmp) const;
   virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 private:
   void hex_string_init(const char *str, uint str_length);
 };
@@ -2443,6 +2743,7 @@ public:
     save_in_field(result_field, no_conversions);
   }
   void cleanup();
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
   /*
     This method is used for debug purposes to print the name of an
     item to the debug log. The second use of this method is as
@@ -2471,11 +2772,12 @@ public:
   enum Ref_Type { REF, DIRECT_REF, VIEW_REF, OUTER_REF, AGGREGATE_REF };
   Field *result_field;			 /* Save result here */
   Item **ref;
+  bool reference_trough_name;
   Item_ref(Name_resolution_context *context_arg,
            const char *db_arg, const char *table_name_arg,
            const char *field_name_arg)
     :Item_ident(context_arg, db_arg, table_name_arg, field_name_arg),
-     result_field(0), ref(0) {}
+    result_field(0), ref(0), reference_trough_name(1) {}
   /*
     This constructor is used in two scenarios:
     A) *item = NULL
@@ -2500,11 +2802,15 @@ public:
   Item_ref(THD *thd, Item_ref *item)
     :Item_ident(thd, item), result_field(item->result_field), ref(item->ref) {}
   enum Type type() const		{ return REF_ITEM; }
+  enum Type real_type() const           { return ref ? (*ref)->type() :
+                                          REF_ITEM; }
   bool eq(const Item *item, bool binary_cmp) const
   { 
     Item *it= ((Item *) item)->real_item();
     return ref && (*ref)->eq(it, binary_cmp);
   }
+  void save_val(Field *to);
+  void save_result(Field *to);
   double val_real();
   longlong val_int();
   my_decimal *val_decimal(my_decimal *);
@@ -2521,6 +2827,7 @@ public:
   bool send(Protocol *prot, String *tmp);
   void make_field(Send_field *field);
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   int save_in_field(Field *field, bool no_conversions);
   void save_org_in_field(Field *field);
   enum Item_result result_type () const { return (*ref)->result_type(); }
@@ -2528,16 +2835,16 @@ public:
   Field *get_tmp_table_field()
   { return result_field ? result_field : (*ref)->get_tmp_table_field(); }
   Item *get_tmp_table_item(THD *thd);
-  table_map used_tables() const		
+  table_map used_tables() const;		
+  void update_used_tables(); 
+  bool const_item() const 
   {
-    return depended_from ? OUTER_REF_TABLE_BIT : (*ref)->used_tables(); 
+    return (*ref)->const_item();
   }
-  void update_used_tables() 
+  table_map not_null_tables() const 
   { 
-    if (!depended_from) 
-      (*ref)->update_used_tables(); 
+    return depended_from ? 0 : (*ref)->not_null_tables();
   }
-  table_map not_null_tables() const { return (*ref)->not_null_tables(); }
   void set_result_field(Field *field)	{ result_field= field; }
   bool is_result_field() { return 1; }
   void save_in_result_field(bool no_conversions)
@@ -2549,15 +2856,27 @@ public:
     return ref ? (*ref)->real_item() : this;
   }
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
+  { 
+    if (ref && *ref)
+      return (*ref)->walk(processor, walk_subquery, arg) ||
+             (this->*processor)(arg); 
+    else
+      return FALSE;
+  }
+  Item* transform(Item_transformer, uchar *arg);
+  Item* compile(Item_analyzer analyzer, uchar **arg_p,
+                Item_transformer transformer, uchar *arg_t);
+  bool enumerate_field_refs_processor(uchar *arg)
+  { return (*ref)->enumerate_field_refs_processor(arg); }
+  void no_rows_in_result()
   {
-    return (*ref)->walk(processor, walk_subquery, arg) ||
-           (this->*processor)(arg);
+    (*ref)->no_rows_in_result();
   }
-  virtual void print(String *str, enum_query_type query_type);
-  bool result_as_longlong()
+  void restore_to_before_no_rows_in_result()
   {
-    return (*ref)->result_as_longlong();
+    (*ref)->restore_to_before_no_rows_in_result();
   }
+  virtual void print(String *str, enum_query_type query_type);
   void cleanup();
   Item_field *filed_for_view_update()
     { return (*ref)->filed_for_view_update(); }
@@ -2590,19 +2909,17 @@ public:
     if (ref && result_type() == ROW_RESULT)
       (*ref)->bring_value();
   }
-  bool get_time(MYSQL_TIME *ltime)
+  bool check_vcol_func_processor(uchar *arg) 
   {
-    DBUG_ASSERT(fixed);
-    return (*ref)->get_time(ltime);
+    return trace_unsupported_by_check_vcol_func_processor("ref");
   }
-  virtual bool basic_const_item() const { return ref && (*ref)->basic_const_item(); }
+  bool basic_const_item() const { return ref && (*ref)->basic_const_item(); }
   bool is_outer_field() const
   {
     DBUG_ASSERT(fixed);
     DBUG_ASSERT(ref);
     return (*ref)->is_outer_field();
   }
-
 };
 
 
@@ -2629,6 +2946,7 @@ public:
               alias_name_used_arg)
   {}
 
+  void save_val(Field *to);
   double val_real();
   longlong val_int();
   String *val_str(String* tmp);
@@ -2639,23 +2957,184 @@ public:
   virtual Ref_Type ref_type() { return DIRECT_REF; }
 };
 
+
+/**
+  This class is the same as Item_direct_ref but created to wrap Item_ident
+  before fix_fields() call
+*/
+
+class Item_direct_ref_to_ident :public Item_direct_ref
+{
+  Item_ident *ident;
+public:
+  Item_direct_ref_to_ident(Item_ident *item)
+    :Item_direct_ref(item->context, (Item**)&item, item->table_name, item->field_name,
+                     FALSE)
+  {
+    ident= item;
+    ref= (Item**)&ident;
+  }
+
+  bool fix_fields(THD *thd, Item **it)
+  {
+    DBUG_ASSERT(ident->type() == FIELD_ITEM || ident->type() == REF_ITEM);
+    if ((!ident->fixed && ident->fix_fields(thd, ref)) ||
+        ident->check_cols(1))
+      return TRUE;
+    set_properties();
+    return FALSE;
+  }
+
+  virtual void print(String *str, enum_query_type query_type)
+  { ident->print(str, query_type); }
+
+};
+
+
+class Item_cache;
+class Expression_cache;
+
+/**
+  The objects of this class can store its values in an expression cache.
+*/
+
+class Item_cache_wrapper :public Item_result_field
+{
+private:
+  /* Pointer on the cached expression */
+  Item *orig_item;
+  Expression_cache *expr_cache;
+  /*
+    In order to put the expression into the expression cache and return
+    value of val_*() method, we will need to get the expression value twice
+    (probably in different types).  In order to avoid making two
+    (potentially costly) orig_item->val_*() calls, we store expression value
+    in this Item_cache object.
+  */
+  Item_cache *expr_value;
+
+  List<Item> parameters;
+
+  Item *check_cache();
+  void cache();
+  void init_on_demand();
+
+public:
+  Item_cache_wrapper(Item *item_arg);
+  ~Item_cache_wrapper();
+
+  const char *func_name() const { return "<expr_cache>"; }
+  enum Type type() const { return EXPR_CACHE_ITEM; }
+  enum Type real_type() const { return orig_item->type(); }
+
+  bool set_cache(THD *thd);
+
+  bool fix_fields(THD *thd, Item **it);
+  void fix_length_and_dec() {}
+  void cleanup();
+
+  /* Methods of getting value which should be cached in the cache */
+  void save_val(Field *to);
+  double val_real();
+  longlong val_int();
+  String *val_str(String* tmp);
+  my_decimal *val_decimal(my_decimal *);
+  bool val_bool();
+  bool is_null();
+  bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
+  bool send(Protocol *protocol, String *buffer);
+  void save_org_in_field(Field *field)
+  {
+    save_val(field);
+  }
+  void save_in_result_field(bool no_conversions)
+  {
+    save_val(result_field);
+  }
+  Item* get_tmp_table_item(THD *thd_arg);
+
+  /* Following methods make this item transparent as much as possible */
+
+  virtual void print(String *str, enum_query_type query_type);
+  virtual const char *full_name() const { return orig_item->full_name(); }
+  virtual void make_field(Send_field *field) { orig_item->make_field(field); }
+  bool eq(const Item *item, bool binary_cmp) const
+  {
+    Item *it= ((Item *) item)->real_item();
+    return orig_item->eq(it, binary_cmp);
+  }
+  void fix_after_pullout(st_select_lex *new_parent, Item **refptr)
+  {
+    orig_item->fix_after_pullout(new_parent, &orig_item);
+  }
+  int save_in_field(Field *to, bool no_conversions);
+  enum Item_result result_type () const { return orig_item->result_type(); }
+  enum_field_types field_type() const   { return orig_item->field_type(); }
+  table_map used_tables() const { return orig_item->used_tables(); }
+  void update_used_tables() { orig_item->update_used_tables(); }
+  bool const_item() const { return orig_item->const_item(); }
+  table_map not_null_tables() const { return orig_item->not_null_tables(); }
+  bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
+  {
+    return orig_item->walk(processor, walk_subquery, arg) ||
+      (this->*processor)(arg);
+  }
+  bool enumerate_field_refs_processor(uchar *arg)
+  { return orig_item->enumerate_field_refs_processor(arg); }
+  Item_field *filed_for_view_update()
+  { return orig_item->filed_for_view_update(); }
+
+  /* Row emulation: forwarding of ROW-related calls to orig_item */
+  uint cols()
+  { return result_type() == ROW_RESULT ? orig_item->cols() : 1; }
+  Item* element_index(uint i)
+  { return result_type() == ROW_RESULT ? orig_item->element_index(i) : this; }
+  Item** addr(uint i)
+  { return result_type() == ROW_RESULT ? orig_item->addr(i) : 0; }
+  bool check_cols(uint c)
+  {
+    return (result_type() == ROW_RESULT ?
+            orig_item->check_cols(c) :
+            Item::check_cols(c));
+  }
+  bool null_inside()
+  { return result_type() == ROW_RESULT ? orig_item->null_inside() : 0; }
+  void bring_value()
+  {
+    if (result_type() == ROW_RESULT)
+      orig_item->bring_value();
+  }
+  virtual bool is_expensive() { return orig_item->is_expensive(); }
+  bool is_expensive_processor(uchar *arg)
+  { return orig_item->is_expensive_processor(arg); }
+  bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor("cache");
+  }
+};
+
+
 /*
   Class for view fields, the same as Item_direct_ref, but call fix_fields
   of reference if it is not called yet
 */
 class Item_direct_view_ref :public Item_direct_ref
 {
+  Item_equal *item_equal;
+  TABLE_LIST *view;
 public:
   Item_direct_view_ref(Name_resolution_context *context_arg, Item **item,
-                  const char *table_name_arg,
-                  const char *field_name_arg)
-    :Item_direct_ref(context_arg, item, table_name_arg, field_name_arg) {}
+                       const char *table_name_arg,
+                       const char *field_name_arg,
+                       TABLE_LIST *view_arg)
+    :Item_direct_ref(context_arg, item, table_name_arg, field_name_arg),
+    item_equal(0), view(view_arg) {}
   /* Constructor need to process subselect with temporary tables (see Item) */
   Item_direct_view_ref(THD *thd, Item_direct_ref *item)
-    :Item_direct_ref(thd, item) {}
+    :Item_direct_ref(thd, item), item_equal(0) {}
   Item_direct_view_ref(TABLE_LIST *view_arg, Item **item,
                        const char *field_name_arg)
-    :Item_direct_ref(view_arg, item, field_name_arg)
+    :Item_direct_ref(view_arg, item, field_name_arg), item_equal(0)
   {}
 
   bool fix_fields(THD *, Item **);
@@ -2667,6 +3146,25 @@ public:
     return item;
   }
   virtual Ref_Type ref_type() { return VIEW_REF; }
+  Item_equal *get_item_equal() { return item_equal; }
+  void set_item_equal(Item_equal *item_eq) { item_equal= item_eq; }
+  Item_equal *find_item_equal(COND_EQUAL *cond_equal);
+  bool subst_argument_checker(uchar **arg);
+  Item *equal_fields_propagator(uchar *arg);
+  Item *replace_equal_field(uchar *arg);
+  table_map used_tables() const;	
+  bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
+  { 
+    return (*ref)->walk(processor, walk_subquery, arg) ||
+           (this->*processor)(arg);
+  }
+   bool view_used_tables_processor(uchar *arg) 
+  {
+    TABLE_LIST *view_arg= (TABLE_LIST *) arg;
+    if (view_arg == view)
+      view_arg->view_used_tables|= (*ref)->used_tables();
+    return 0;
+  }
 };
 
 
@@ -2692,12 +3190,13 @@ public:
     of the outer select.
   */
   bool found_in_select_list;
+  bool found_in_group_by;
   Item_outer_ref(Name_resolution_context *context_arg,
                  Item_field *outer_field_arg)
     :Item_direct_ref(context_arg, 0, outer_field_arg->table_name,
                      outer_field_arg->field_name),
     outer_ref(outer_field_arg), in_sum_func(0),
-    found_in_select_list(0)
+    found_in_select_list(0), found_in_group_by(0)
   {
     ref= &outer_ref;
     set_properties();
@@ -2708,18 +3207,21 @@ public:
                  bool alias_name_used_arg)
     :Item_direct_ref(context_arg, item, table_name_arg, field_name_arg,
                      alias_name_used_arg),
-    outer_ref(0), in_sum_func(0), found_in_select_list(1)
+    outer_ref(0), in_sum_func(0), found_in_select_list(1), found_in_group_by(0)
   {}
   void save_in_result_field(bool no_conversions)
   {
     outer_ref->save_org_in_field(result_field);
   }
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   table_map used_tables() const
   {
     return (*ref)->const_item() ? 0 : OUTER_REF_TABLE_BIT;
   }
+  table_map not_null_tables() const { return 0; }
   virtual Ref_Type ref_type() { return OUTER_REF; }
+  bool check_inner_refs_processor(uchar * arg); 
 };
 
 
@@ -2745,6 +3247,7 @@ public:
 		       const char *table_name_arg, const char *field_name_arg)
     :Item_ref(context_arg, item, table_name_arg, field_name_arg),
      owner(master) {}
+  void save_val(Field *to);
   double val_real();
   longlong val_int();
   String* val_str(String* s);
@@ -2752,15 +3255,7 @@ public:
   bool val_bool();
   bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
   virtual void print(String *str, enum_query_type query_type);
-  /*
-    we add RAND_TABLE_BIT to prevent moving this item from HAVING to WHERE
-  */
-  table_map used_tables() const
-  {
-    return (depended_from ?
-            OUTER_REF_TABLE_BIT :
-            (*ref)->used_tables() | RAND_TABLE_BIT);
-  }
+  table_map used_tables() const;
 };
 
 /*
@@ -2776,7 +3271,7 @@ class Item_int_with_ref :public Item_int
 {
   Item *ref;
 public:
-  Item_int_with_ref(longlong i, Item *ref_arg, my_bool unsigned_arg) :
+  Item_int_with_ref(longlong i, Item *ref_arg, bool unsigned_arg) :
     Item_int(i), ref(ref_arg)
   {
     unsigned_flag= unsigned_arg;
@@ -2891,6 +3386,10 @@ public:
   table_map used_tables() const { return (table_map) 1L; }
   bool const_item() const { return 0; }
   bool is_null() { return null_value; }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("copy");
+  }
 
   /*  
     Override the methods below as pure virtual to make sure all the 
@@ -3006,10 +3505,32 @@ public:
 };
 
 
+/*
+  Cached_item_XXX objects are not exactly caches. They do the following:
+
+  Each Cached_item_XXX object has
+   - its source item
+   - saved value of the source item
+   - cmp() method that compares the saved value with the current value of the
+     source item, and if they were not equal saves item's value into the saved
+     value.
+*/
+
+/*
+  Cached_item_XXX objects are not exactly caches. They do the following:
+
+  Each Cached_item_XXX object has
+   - its source item
+   - saved value of the source item
+   - cmp() method that compares the saved value with the current value of the
+     source item, and if they were not equal saves item's value into the saved
+     value.
+*/
+
 class Cached_item :public Sql_alloc
 {
 public:
-  my_bool null_value;
+  bool null_value;
   Cached_item() :null_value(0) {}
   virtual bool cmp(void)=0;
   virtual ~Cached_item(); /*line -e1509 */
@@ -3062,9 +3583,10 @@ class Cached_item_field :public Cached_item
   uint length;
 
 public:
-  Cached_item_field(Item_field *item)
+  Cached_item_field(Field *arg_field) : field(arg_field)
   {
-    field= item->field;
+    field= arg_field;
+    /* TODO: take the memory allocation below out of the constructor. */
     buff= (uchar*) sql_calloc(length=field->pack_length());
   }
   bool cmp(void);
@@ -3123,7 +3645,8 @@ public:
   {
     return Item_field::save_in_field(field_arg, no_conversions);
   }
-  /* 
+  enum Type type() const { return INSERT_VALUE_ITEM; }
+  /*
    We use RAND_TABLE_BIT to prevent Item_insert_value from
    being treated as a constant and precalculated before execution
   */
@@ -3134,6 +3657,10 @@ public:
     return arg->walk(processor, walk_subquery, args) ||
 	    (this->*processor)(args);
   }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("values");
+  }
 };
 
 
@@ -3218,9 +3745,20 @@ private:
     BEFORE INSERT of BEFORE UPDATE trigger.
   */
   bool read_only;
+  virtual bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor("trigger");
+  }
 };
 
 
+/**
+  @todo
+  Implement the is_null() method for this class. Currently calling is_null()
+  on any Item_cache object resolves to Item::is_null(), which reutns FALSE
+  for any value.
+*/
+
 class Item_cache: public Item_basic_constant
 {
 protected:
@@ -3247,7 +3785,8 @@ public:
     cached_field_type(MYSQL_TYPE_STRING),
     value_cached(0)
   {
-    fixed= 1; 
+    fixed= 1;
+    maybe_null= 1;
     null_value= 1;
   }
   Item_cache(enum_field_types field_type_arg):
@@ -3256,6 +3795,7 @@ public:
     value_cached(0)
   {
     fixed= 1;
+    maybe_null= 1;
     null_value= 1;
   }
 
@@ -3288,6 +3828,10 @@ public:
   {
     return this == item;
   }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("cache");
+  }
   /**
      Check if saved item has a non-NULL value.
      Will cache value of saved item if not already done. 
@@ -3298,24 +3842,12 @@ public:
     return (value_cached || cache_value()) && !null_value;
   }
 
-  /** 
-    If this item caches a field value, return pointer to underlying field.
-
-    @return Pointer to field, or NULL if this is not a cache for a field value.
-  */
-  Field* field() { return cached_field; }
-
   virtual void store(Item *item);
   virtual bool cache_value()= 0;
   bool basic_const_item() const
   { return test(example && example->basic_const_item());}
   virtual void clear() { null_value= TRUE; value_cached= FALSE; }
-  Item_result result_type() const
-  {
-    if (!example)
-      return INT_RESULT;
-    return Field::result_merge_type(example->field_type());
-  }
+  bool is_null() { return null_value; }
 };
 
 
@@ -3324,20 +3856,41 @@ class Item_cache_int: public Item_cache
 protected:
   longlong value;
 public:
-  Item_cache_int(): Item_cache(),
+  Item_cache_int(): Item_cache(MYSQL_TYPE_LONGLONG),
     value(0) {}
   Item_cache_int(enum_field_types field_type_arg):
     Item_cache(field_type_arg), value(0) {}
 
-  virtual void store(Item *item){ Item_cache::store(item); }
-  void store(Item *item, longlong val_arg);
   double val_real();
   longlong val_int();
   String* val_str(String *str);
   my_decimal *val_decimal(my_decimal *);
   enum Item_result result_type() const { return INT_RESULT; }
-  bool result_as_longlong() { return TRUE; }
   bool cache_value();
+  int save_in_field(Field *field, bool no_conversions);
+};
+
+
+class Item_cache_temporal: public Item_cache_int
+{
+public:
+  Item_cache_temporal(enum_field_types field_type_arg);
+  String* val_str(String *str);
+  bool cache_value();
+  bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
+  int save_in_field(Field *field, bool no_conversions);
+  void store_packed(longlong val_arg);
+  /*
+    Having a clone_item method tells optimizer that this object
+    is a constant and need not be optimized further.
+    Important when storing packed datetime values.
+  */
+  Item *clone_item()
+  {
+    Item_cache_temporal *item= new Item_cache_temporal(cached_field_type);
+    item->store_packed(value);
+    return item;
+  }
 };
 
 
@@ -3345,7 +3898,7 @@ class Item_cache_real: public Item_cache
 {
   double value;
 public:
-  Item_cache_real(): Item_cache(),
+  Item_cache_real(): Item_cache(MYSQL_TYPE_DOUBLE),
     value(0) {}
 
   double val_real();
@@ -3362,7 +3915,7 @@ class Item_cache_decimal: public Item_cache
 protected:
   my_decimal decimal_value;
 public:
-  Item_cache_decimal(): Item_cache() {}
+  Item_cache_decimal(): Item_cache(MYSQL_TYPE_NEWDECIMAL) {}
 
   double val_real();
   longlong val_int();
@@ -3468,40 +4021,6 @@ public:
 };
 
 
-class Item_cache_datetime: public Item_cache
-{
-protected:
-  String str_value;
-  longlong int_value;
-  bool str_value_cached;
-public:
-  Item_cache_datetime(enum_field_types field_type_arg):
-    Item_cache(field_type_arg), int_value(0), str_value_cached(0)
-  {
-    cmp_context= STRING_RESULT;
-  }
-
-  void store(Item *item, longlong val_arg);
-  void store(Item *item);
-  double val_real();
-  longlong val_int();
-  String* val_str(String *str);
-  my_decimal *val_decimal(my_decimal *);
-  enum Item_result result_type() const { return STRING_RESULT; }
-  bool result_as_longlong() { return TRUE; }
-  /*
-    In order to avoid INT <-> STRING conversion of a DATETIME value
-    two cache_value functions are introduced. One (cache_value) caches STRING
-    value, another (cache_value_int) - INT value. Thus this cache item
-    completely relies on the ability of the underlying item to do the
-    correct conversion.
-  */
-  bool cache_value_int();
-  bool cache_value();
-  void clear() { Item_cache::clear(); str_value_cached= FALSE; }
-};
-
-
 /*
   Item_type_holder used to store type. name, length of Item for UNIONS &
   derived tables.
@@ -3545,11 +4064,96 @@ void mark_select_range_as_dependent(THD *thd,
                                     Field *found_field, Item *found_item,
                                     Item_ident *resolved_item);
 
-extern Cached_item *new_Cached_item(THD *thd, Item *item);
+extern Cached_item *new_Cached_item(THD *thd, Item *item,
+                                    bool pass_through_ref);
 extern Item_result item_cmp_type(Item_result a,Item_result b);
 extern void resolve_const_item(THD *thd, Item **ref, Item *cmp_item);
 extern int stored_field_cmp_to_item(THD *thd, Field *field, Item *item);
 
 extern const String my_null_string;
 
-#endif /* ITEM_INCLUDED */
+/**
+  Interface for Item iterator
+*/
+
+class Item_iterator
+{
+public:
+  /**
+    Shall set this iterator to the position before the first item
+
+    @note
+    This method also may perform some other initialization actions like
+    allocation of certain resources.
+  */
+  virtual void open()= 0;
+  /**
+    Shall return the next Item (or NULL if there is no next item) and
+    move pointer to position after it.
+  */
+  virtual Item *next()= 0;
+  /**
+    Shall force iterator to free resources (if it holds them)
+
+    @note
+    One should not use the iterator without open() call after close()
+  */
+  virtual void close()= 0;
+
+  virtual ~Item_iterator() {}
+};
+
+
+/**
+  Item iterator over List_iterator_fast for Item references
+*/
+
+class Item_iterator_ref_list: public Item_iterator
+{
+  List_iterator<Item*> list;
+public:
+  Item_iterator_ref_list(List_iterator<Item*> &arg_list):
+    list(arg_list) {}
+  void open() { list.rewind(); }
+  Item *next() { return *(list++); }
+  void close() {}
+};
+
+
+/**
+  Item iterator over List_iterator_fast for Items
+*/
+
+class Item_iterator_list: public Item_iterator
+{
+  List_iterator<Item> list;
+public:
+  Item_iterator_list(List_iterator<Item> &arg_list):
+    list(arg_list) {}
+  void open() { list.rewind(); }
+  Item *next() { return (list++); }
+  void close() {}
+};
+
+
+/**
+  Item iterator over Item interface for rows
+*/
+
+class Item_iterator_row: public Item_iterator
+{
+  Item *base_item;
+  uint current;
+public:
+  Item_iterator_row(Item *base) : base_item(base), current(0) {}
+  void open() { current= 0; }
+  Item *next()
+  {
+    if (current >= base_item->cols())
+      return NULL;
+    return base_item->element_index(current++);
+  }
+  void close() {}
+};
+
+#endif /* SQL_ITEM_INCLUDED */
diff --git a/sql/item_buff.cc b/sql/item_buff.cc
index 871615647d2..e72c63daf7d 100644
--- a/sql/item_buff.cc
+++ b/sql/item_buff.cc
@@ -34,11 +34,15 @@
   Create right type of Cached_item for an item.
 */
 
-Cached_item *new_Cached_item(THD *thd, Item *item)
+Cached_item *new_Cached_item(THD *thd, Item *item, bool pass_through_ref)
 {
-  if (item->real_item()->type() == Item::FIELD_ITEM &&
+  if (pass_through_ref && item->real_item()->type() == Item::FIELD_ITEM &&
       !(((Item_field *) (item->real_item()))->field->flags & BLOB_FLAG))
-    return new Cached_item_field((Item_field *) (item->real_item()));
+  {
+    Item_field *real_item= (Item_field *) item->real_item();
+    Field *cached_field= real_item->field;
+    return new Cached_item_field(cached_field);
+  }
   switch (item->result_type()) {
   case STRING_RESULT:
     return new Cached_item_str(thd, (Item_field *) item);
@@ -124,14 +128,20 @@ bool Cached_item_int::cmp(void)
 
 bool Cached_item_field::cmp(void)
 {
-  bool tmp= field->cmp(buff) != 0;		// This is not a blob!
-  if (tmp)
-    field->get_image(buff,length,field->charset());
+  bool tmp= FALSE;                              // Value is identical
+  /* Note that field can't be a blob here ! */
   if (null_value != field->is_null())
   {
     null_value= !null_value;
-    tmp=TRUE;
+    tmp= TRUE;                                  // Value has changed
   }
+
+  /*
+    If value is not null and value changed (from null to not null or
+    becasue of value change), then copy the new value to buffer.
+    */
+  if (! null_value && (tmp || (tmp= (field->cmp(buff) != 0))))
+    field->get_image(buff,length,field->charset());
   return tmp;
 }
 
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index e28221109d9..325e4ac219e 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -30,11 +31,9 @@
 #include "sql_select.h"
 #include "sql_parse.h"                          // check_stack_overrun
 #include "sql_time.h"                  // make_truncated_value_warning
+#include "sql_base.h"                  // dynamic_column_error_message
 
-static bool convert_constant_item(THD *, Item_field *, Item **);
-static longlong
-get_year_value(THD *thd, Item ***item_arg, Item **cache_arg,
-               Item *warn_item, bool *is_null);
+static bool convert_const_to_int(THD *, Item_field *, Item **);
 
 static Item_result item_store_type(Item_result a, Item *item,
                                    my_bool unsigned_flag)
@@ -78,6 +77,30 @@ static void agg_result_type(Item_result *type, Item **items, uint nitems)
 }
 
 
+/**
+  find an temporal type (item) that others will be converted to
+  for the purpose of comparison.
+
+  this is the type that will be used in warnings like
+  "Incorrect <<TYPE>> value".
+*/
+Item *find_date_time_item(Item **args, uint nargs, uint col)
+{
+  Item *date_arg= 0, **arg, **arg_end;
+  for (arg= args, arg_end= args + nargs; arg != arg_end ; arg++)
+  {
+    Item *item= arg[0]->element_index(col);
+    if (item->cmp_type() != TIME_RESULT)
+      continue;
+    if (item->field_type() == MYSQL_TYPE_DATETIME)
+      return item;
+    if (!date_arg)
+      date_arg= item;
+  }
+  return date_arg;
+}
+
+
 /*
   Compare row signature of two expressions
 
@@ -140,10 +163,10 @@ static int cmp_row_type(Item* item1, Item* item2)
 static int agg_cmp_type(Item_result *type, Item **items, uint nitems)
 {
   uint i;
-  type[0]= items[0]->result_type();
+  type[0]= items[0]->cmp_type();
   for (i= 1 ; i < nitems ; i++)
   {
-    type[0]= item_cmp_type(type[0], items[i]->result_type());
+    type[0]= item_cmp_type(type[0], items[i]->cmp_type());
     /*
       When aggregating types of two row expressions we have to check
       that they have the same cardinality and that each component
@@ -209,7 +232,7 @@ static uint collect_cmp_types(Item **items, uint nitems, bool skip_nulls= FALSE)
 {
   uint i;
   uint found_types;
-  Item_result left_result= items[0]->result_type();
+  Item_result left_result= items[0]->cmp_type();
   DBUG_ASSERT(nitems > 1);
   found_types= 0;
   for (i= 1; i < nitems ; i++)
@@ -217,11 +240,11 @@ static uint collect_cmp_types(Item **items, uint nitems, bool skip_nulls= FALSE)
     if (skip_nulls && items[i]->type() == Item::NULL_ITEM)
       continue; // Skip NULL constant items
     if ((left_result == ROW_RESULT || 
-         items[i]->result_type() == ROW_RESULT) &&
+         items[i]->cmp_type() == ROW_RESULT) &&
         cmp_row_type(items[0], items[i]))
       return 0;
     found_types|= 1<< (uint)item_cmp_type(left_result,
-                                           items[i]->result_type());
+                                           items[i]->cmp_type());
   }
   /*
    Even if all right-hand items are NULLs and we are skipping them all, we need
@@ -247,36 +270,61 @@ Item_bool_func2* Eq_creator::create(Item *a, Item *b) const
   return new Item_func_eq(a, b);
 }
 
+Item_bool_func2* Eq_creator::create_swap(Item *a, Item *b) const
+{
+  return new Item_func_eq(b, a);
+}
 
 Item_bool_func2* Ne_creator::create(Item *a, Item *b) const
 {
   return new Item_func_ne(a, b);
 }
 
+Item_bool_func2* Ne_creator::create_swap(Item *a, Item *b) const
+{
+  return new Item_func_ne(b, a);
+}
 
 Item_bool_func2* Gt_creator::create(Item *a, Item *b) const
 {
   return new Item_func_gt(a, b);
 }
 
+Item_bool_func2* Gt_creator::create_swap(Item *a, Item *b) const
+{
+  return new Item_func_lt(b, a);
+}
 
 Item_bool_func2* Lt_creator::create(Item *a, Item *b) const
 {
   return new Item_func_lt(a, b);
 }
 
+Item_bool_func2* Lt_creator::create_swap(Item *a, Item *b) const
+{
+  return new Item_func_gt(b, a);
+}
 
 Item_bool_func2* Ge_creator::create(Item *a, Item *b) const
 {
   return new Item_func_ge(a, b);
 }
 
+Item_bool_func2* Ge_creator::create_swap(Item *a, Item *b) const
+{
+  return new Item_func_le(b, a);
+}
 
 Item_bool_func2* Le_creator::create(Item *a, Item *b) const
 {
   return new Item_func_le(a, b);
 }
 
+Item_bool_func2* Le_creator::create_swap(Item *a, Item *b) const
+{
+  return new Item_func_ge(b, a);
+}
+
 /*
   Test functions
   Most of these  returns 0LL if false and 1LL if true and
@@ -397,13 +445,25 @@ longlong Item_func_nop_all::val_int()
     1  Item was replaced with an integer version of the item
 */
 
-static bool convert_constant_item(THD *thd, Item_field *field_item,
+static bool convert_const_to_int(THD *thd, Item_field *field_item,
                                   Item **item)
 {
   Field *field= field_item->field;
   int result= 0;
 
-  if ((*item)->const_item())
+  /*
+    We don't need to convert an integer to an integer,
+    pretend it's already converted.
+
+    But we still convert it if it is compared with a Field_year,
+    as YEAR(2) may change the value of an integer when converting it
+    to an integer (say, 0 to 70).
+  */
+  if ((*item)->cmp_type() == INT_RESULT &&
+      field_item->field_type() != MYSQL_TYPE_YEAR)
+    return 1;
+
+  if ((*item)->const_item() && !(*item)->is_expensive())
   {
     TABLE *table= field->table;
     ulonglong orig_sql_mode= thd->variables.sql_mode;
@@ -414,7 +474,8 @@ static bool convert_constant_item(THD *thd, Item_field *field_item,
     LINT_INIT(old_maps[0]);
     LINT_INIT(old_maps[1]);
 
-    if (table)
+    /* table->read_set may not be set if we come here from a CREATE TABLE */
+    if (table && table->read_set)
       dbug_tmp_use_all_columns(table, old_maps, 
                                table->read_set, table->write_set);
     /* For comparison purposes allow invalid dates like 2000-01-32 */
@@ -423,17 +484,15 @@ static bool convert_constant_item(THD *thd, Item_field *field_item,
     thd->count_cuted_fields= CHECK_FIELD_IGNORE;
 
     /*
-      Store the value of the field/constant if it references an outer field
-      because the call to save_in_field below overrides that value.
-      Don't save field value if no data has been read yet.
-      Outer constant values are always saved.
+      Store the value of the field/constant because the call to save_in_field
+      below overrides that value. Don't save field value if no data has been
+      read yet.
     */
-    bool save_field_value= (field_item->depended_from &&
-                            (field_item->const_item() ||
-                             !(field->table->status & STATUS_NO_RECORD)));
+    bool save_field_value= (field_item->const_item() ||
+                            !(field->table->status & STATUS_NO_RECORD));
     if (save_field_value)
       orig_field_val= field->val_int();
-    if (!(*item)->is_null() && !(*item)->save_in_field(field, 1))
+    if (!(*item)->save_in_field(field, 1) && !field->is_null())
     {
       Item *tmp= new Item_int_with_ref(field->val_int(), *item,
                                        test(field->flags & UNSIGNED_FLAG));
@@ -450,7 +509,7 @@ static bool convert_constant_item(THD *thd, Item_field *field_item,
     }
     thd->variables.sql_mode= orig_sql_mode;
     thd->count_cuted_fields= orig_count_cuted_fields;
-    if (table)
+    if (table && table->read_set)
       dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_maps);
   }
   return result;
@@ -481,7 +540,6 @@ void Item_bool_func2::fix_length_and_dec()
     to the collation of A.
   */
 
-  
   DTCollation coll;
   if (args[0]->result_type() == STRING_RESULT &&
       args[1]->result_type() == STRING_RESULT &&
@@ -490,48 +548,27 @@ void Item_bool_func2::fix_length_and_dec()
     
   args[0]->cmp_context= args[1]->cmp_context=
     item_cmp_type(args[0]->result_type(), args[1]->result_type());
-  // Make a special case of compare with fields to get nicer DATE comparisons
 
-  if (functype() == LIKE_FUNC)  // Disable conversion in case of LIKE function.
-  {
-    set_cmp_func();
-    return;
-  }
+  /*
+    Make a special case of compare with fields to get nicer comparisons
+    of numbers with constant string.
+    This directly contradicts the manual (number and a string should
+    be compared as doubles), but seems to provide more
+    "intuitive" behavior in some cases (but less intuitive in others).
 
+    But disable conversion in case of LIKE function.
+  */
   thd= current_thd;
-  if (!thd->lex->is_ps_or_view_context_analysis())
+  if (functype() != LIKE_FUNC && !thd->lex->is_ps_or_view_context_analysis())
   {
-    if (args[0]->real_item()->type() == FIELD_ITEM)
+    int field;
+    if (args[field= 0]->real_item()->type() == FIELD_ITEM ||
+        args[field= 1]->real_item()->type() == FIELD_ITEM)
     {
-      Item_field *field_item= (Item_field*) (args[0]->real_item());
-      if (field_item->field->can_be_compared_as_longlong() &&
-          !(field_item->is_datetime() &&
-            args[1]->result_type() == STRING_RESULT))
-      {
-        if (convert_constant_item(thd, field_item, &args[1]))
-        {
-          cmp.set_cmp_func(this, tmp_arg, tmp_arg+1,
-                           INT_RESULT);		// Works for all types.
-          args[0]->cmp_context= args[1]->cmp_context= INT_RESULT;
-          return;
-        }
-      }
-    }
-    if (args[1]->real_item()->type() == FIELD_ITEM)
-    {
-      Item_field *field_item= (Item_field*) (args[1]->real_item());
-      if (field_item->field->can_be_compared_as_longlong() &&
-          !(field_item->is_datetime() &&
-            args[0]->result_type() == STRING_RESULT))
-      {
-        if (convert_constant_item(thd, field_item, &args[0]))
-        {
-          cmp.set_cmp_func(this, tmp_arg, tmp_arg+1,
-                           INT_RESULT); // Works for all types.
-          args[0]->cmp_context= args[1]->cmp_context= INT_RESULT;
-          return;
-        }
-      }
+      Item_field *field_item= (Item_field*) (args[field]->real_item());
+      if (field_item->cmp_type() == INT_RESULT &&
+          convert_const_to_int(thd, field_item, &args[!field]))
+        args[0]->cmp_context= args[1]->cmp_context= INT_RESULT;
     }
   }
   set_cmp_func();
@@ -545,6 +582,9 @@ int Arg_comparator::set_compare_func(Item_result_field *item, Item_result type)
                          [is_owner_equal_func()];
 
   switch (type) {
+  case TIME_RESULT:
+    cmp_collation.collation= &my_charset_numeric;
+    break;
   case ROW_RESULT:
   {
     uint n= (*a)->cols();
@@ -638,8 +678,9 @@ int Arg_comparator::set_compare_func(Item_result_field *item, Item_result type)
     }
     break;
   }
-  default:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
+    break;
   }
   return 0;
 }
@@ -653,12 +694,14 @@ int Arg_comparator::set_compare_func(Item_result_field *item, Item_result type)
   @param[in]   warn_name  Field name for issuing the warning
   @param[out]  l_time     The MYSQL_TIME objects is initialized.
 
-  Parses a date provided in the string str into a MYSQL_TIME object. If the
-  string contains an incorrect date or doesn't correspond to a date at all
-  then a warning is issued. The warn_type and the warn_name arguments are used
-  as the name and the type of the field when issuing the warning. If any input
-  was discarded (trailing or non-timestamp-y characters), return value will be
-  TRUE.
+  Parses a date provided in the string str into a MYSQL_TIME object.
+  The date is used for comparison, that is fuzzy dates are allowed
+  independently of sql_mode.
+  If the string contains an incorrect date or doesn't correspond to a date at
+  all then a warning is issued. The warn_type and the warn_name arguments are
+  used as the name and the type of the field when issuing the warning. If any
+  input was discarded (trailing or non-timestamp-y characters), return value
+  will be TRUE.
 
   @return Status flag
   @retval FALSE Success.
@@ -671,16 +714,17 @@ bool get_mysql_time_from_str(THD *thd, String *str, timestamp_type warn_type,
   bool value;
   int error;
   enum_mysql_timestamp_type timestamp_type;
+  int flags= TIME_FUZZY_DATE | MODE_INVALID_DATES;
+  ErrConvString err(str);
+
+  if (warn_type == MYSQL_TIMESTAMP_TIME)
+    flags|= TIME_TIME_ONLY;
 
   timestamp_type= 
-    str_to_datetime(str->ptr(), str->length(), l_time,
-                    (TIME_FUZZY_DATE | MODE_INVALID_DATES |
-                     (thd->variables.sql_mode &
-                      (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE))),
-                    &error);
-
-  if (timestamp_type == MYSQL_TIMESTAMP_DATETIME || 
-      timestamp_type == MYSQL_TIMESTAMP_DATE)
+    str_to_datetime(str->charset(), str->ptr(), str->length(),
+                    l_time, flags, &error);
+
+  if (timestamp_type > MYSQL_TIMESTAMP_ERROR)
     /*
       Do not return yet, we may still want to throw a "trailing garbage"
       warning.
@@ -694,270 +738,38 @@ bool get_mysql_time_from_str(THD *thd, String *str, timestamp_type warn_type,
 
   if (error > 0)
     make_truncated_value_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                 str->ptr(), str->length(),
-                                 warn_type, warn_name);
+                                 &err, warn_type, warn_name);
 
   return value;
 }
 
 
 /**
-  @brief Convert date provided in a string to the int representation.
+  Prepare the comparator (set the comparison function) for comparing
+  items *a1 and *a2 in the context of 'type'.
 
-  @param[in]   thd        thread handle
-  @param[in]   str        a string to convert
-  @param[in]   warn_type  type of the timestamp for issuing the warning
-  @param[in]   warn_name  field name for issuing the warning
-  @param[out]  error_arg  could not extract a DATE or DATETIME
+  @param[in]      owner_arg  Item, peforming the comparison (e.g. Item_func_eq)
+  @param[in,out]  a1         first argument to compare
+  @param[in,out]  a2         second argument to compare
+  @param[in]      type       type context to compare in
 
-  @details Convert date provided in the string str to the int
-    representation.  If the string contains wrong date or doesn't
-    contain it at all then a warning is issued.  The warn_type and
-    the warn_name arguments are used as the name and the type of the
-    field when issuing the warning.
-
-  @return
-    converted value. 0 on error and on zero-dates -- check 'failure'
+  Both *a1 and *a2 can be replaced by this method - typically by constant
+  items, holding the cached converted value of the original (constant) item.
 */
-static ulonglong get_date_from_str(THD *thd, String *str, 
-                                   timestamp_type warn_type, 
-                                   const char *warn_name, bool *error_arg)
-{
-  MYSQL_TIME l_time;
-  *error_arg= get_mysql_time_from_str(thd, str, warn_type, warn_name, &l_time);
-
-  if (*error_arg)
-    return 0;
-  return TIME_to_ulonglong_datetime(&l_time);
-}
-
-
-/*
-  Check whether compare_datetime() can be used to compare items.
-
-  SYNOPSIS
-    Arg_comparator::can_compare_as_dates()
-    a, b          [in]  items to be compared
-    const_value   [out] converted value of the string constant, if any
-
-  DESCRIPTION
-    Check several cases when the DATE/DATETIME comparator should be used.
-    The following cases are checked:
-      1. Both a and b is a DATE/DATETIME field/function returning string or
-         int result.
-      2. Only a or b is a DATE/DATETIME field/function returning string or
-         int result and the other item (b or a) is an item with string result.
-         If the second item is a constant one then it's checked to be
-         convertible to the DATE/DATETIME type. If the constant can't be
-         converted to a DATE/DATETIME then the compare_datetime() comparator
-         isn't used and the warning about wrong DATE/DATETIME value is issued.
-      In all other cases (date-[int|real|decimal]/[int|real|decimal]-date)
-      the comparison is handled by other comparators.
-    If the datetime comparator can be used and one the operands of the
-    comparison is a string constant that was successfully converted to a
-    DATE/DATETIME type then the result of the conversion is returned in the
-    const_value if it is provided.  If there is no constant or
-    compare_datetime() isn't applicable then the *const_value remains
-    unchanged.
-
-  RETURN
-    the found type of date comparison
-*/
-
-enum Arg_comparator::enum_date_cmp_type
-Arg_comparator::can_compare_as_dates(Item *a, Item *b, ulonglong *const_value)
-{
-  enum enum_date_cmp_type cmp_type= CMP_DATE_DFLT;
-  Item *str_arg= 0, *date_arg= 0;
-
-  if (a->type() == Item::ROW_ITEM || b->type() == Item::ROW_ITEM)
-    return CMP_DATE_DFLT;
-
-  if (a->is_datetime())
-  {
-    if (b->is_datetime())
-      cmp_type= CMP_DATE_WITH_DATE;
-    else if (b->result_type() == STRING_RESULT)
-    {
-      cmp_type= CMP_DATE_WITH_STR;
-      date_arg= a;
-      str_arg= b;
-    }
-  }
-  else if (b->is_datetime() && a->result_type() == STRING_RESULT)
-  {
-    cmp_type= CMP_STR_WITH_DATE;
-    date_arg= b;
-    str_arg= a;
-  }
-
-  if (cmp_type != CMP_DATE_DFLT)
-  {
-    THD *thd= current_thd;
-    /*
-      Do not cache GET_USER_VAR() function as its const_item() may return TRUE
-      for the current thread but it still may change during the execution.
-      Don't use cache while in the context analysis mode only (i.e. for 
-      EXPLAIN/CREATE VIEW and similar queries). Cache is useless in such 
-      cases and can cause problems. For example evaluating subqueries can 
-      confuse storage engines since in context analysis mode tables 
-      aren't locked.
-    */
-    if (!thd->lex->is_ps_or_view_context_analysis() &&
-        cmp_type != CMP_DATE_WITH_DATE && str_arg->const_item() &&
-        (str_arg->type() != Item::FUNC_ITEM ||
-        ((Item_func*)str_arg)->functype() != Item_func::GUSERVAR_FUNC))
-    {
-      ulonglong value;
-      bool error;
-      String tmp, *str_val= 0;
-      timestamp_type t_type= (date_arg->field_type() == MYSQL_TYPE_DATE ?
-                              MYSQL_TIMESTAMP_DATE : MYSQL_TIMESTAMP_DATETIME);
-
-      str_val= str_arg->val_str(&tmp);
-      if (str_arg->null_value)
-        return CMP_DATE_DFLT;
-      value= get_date_from_str(thd, str_val, t_type, date_arg->name, &error);
-      if (error)
-        return CMP_DATE_DFLT;
-      if (const_value)
-        *const_value= value;
-    }
-  }
-  return cmp_type;
-}
-
-
-/*
-  Retrieves correct TIME value from the given item.
-
-  SYNOPSIS
-    get_time_value()
-    thd                 thread handle
-    item_arg   [in/out] item to retrieve TIME value from
-    cache_arg  [in/out] pointer to place to store the cache item to
-    warn_item  [in]     unused
-    is_null    [out]    TRUE <=> the item_arg is null
-
-  DESCRIPTION
-    Retrieves the correct TIME value from given item for comparison by the
-    compare_datetime() function.
-    If item's result can be compared as longlong then its int value is used
-    and a value returned by get_time function is used otherwise.
-    If an item is a constant one then its value is cached and it isn't
-    get parsed again. An Item_cache_int object is used for for cached values.
-    It seamlessly substitutes the original item.  The cache item is marked as
-    non-constant to prevent re-caching it again.
-
-  RETURN
-    obtained value
-*/
-
-longlong
-get_time_value(THD *thd, Item ***item_arg, Item **cache_arg,
-               Item *warn_item, bool *is_null)
-{
-  longlong value;
-  Item *item= **item_arg;
-  MYSQL_TIME ltime;
-
-  if (item->result_as_longlong())
-  {
-    value= item->val_int();
-    *is_null= item->null_value;
-  }
-  else
-  {
-    *is_null= item->get_time(&ltime);
-    value= !*is_null ? (longlong) TIME_to_ulonglong_datetime(&ltime) *
-                                  (ltime.neg ? -1 : 1) : 0;
-  }
-  /*
-    Do not cache GET_USER_VAR() function as its const_item() may return TRUE
-    for the current thread but it still may change during the execution.
-  */
-  if (item->const_item() && cache_arg &&
-      item->type() != Item::CACHE_ITEM &&
-      (item->type() != Item::FUNC_ITEM ||
-       ((Item_func*)item)->functype() != Item_func::GUSERVAR_FUNC))
-  {
-    Item_cache_int *cache= new Item_cache_int();
-    /* Mark the cache as non-const to prevent re-caching. */
-    cache->set_used_tables(1);
-    cache->store(item, value);
-    *cache_arg= cache;
-    *item_arg= cache_arg;
-  }
-  return value;
-}
-
 
 int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
                                         Item **a1, Item **a2,
                                         Item_result type)
 {
-  enum enum_date_cmp_type cmp_type;
-  ulonglong const_value= (ulonglong)-1;
   thd= current_thd;
   owner= owner_arg;
   set_null= set_null && owner_arg;
   a= a1;
   b= a2;
-  thd= current_thd;
 
-  if ((cmp_type= can_compare_as_dates(*a, *b, &const_value)))
-  {
-    a_type= (*a)->field_type();
-    b_type= (*b)->field_type();
-    a_cache= 0;
-    b_cache= 0;
-
-    if (const_value != (ulonglong)-1)
-    {
-      /*
-        cache_converted_constant can't be used here because it can't
-        correctly convert a DATETIME value from string to int representation.
-      */
-      Item_cache_int *cache= new Item_cache_int(MYSQL_TYPE_DATETIME);
-      /* Mark the cache as non-const to prevent re-caching. */
-      cache->set_used_tables(1);
-      if (!(*a)->is_datetime())
-      {
-        cache->store((*a), const_value);
-        a_cache= cache;
-        a= (Item **)&a_cache;
-      }
-      else
-      {
-        cache->store((*b), const_value);
-        b_cache= cache;
-        b= (Item **)&b_cache;
-      }
-    }
-    is_nulls_eq= is_owner_equal_func();
-    func= &Arg_comparator::compare_datetime;
-    get_value_a_func= &get_datetime_value;
-    get_value_b_func= &get_datetime_value;
-    cmp_collation.set(&my_charset_numeric);
-    set_cmp_context_for_datetime();
-    return 0;
-  }
-  else if (type == STRING_RESULT && (*a)->field_type() == MYSQL_TYPE_TIME &&
-           (*b)->field_type() == MYSQL_TYPE_TIME)
-  {
-    /* Compare TIME values as integers. */
-    a_cache= 0;
-    b_cache= 0;
-    is_nulls_eq= is_owner_equal_func();
-    func= &Arg_comparator::compare_datetime;
-    get_value_a_func= &get_time_value;
-    get_value_b_func= &get_time_value;
-    set_cmp_context_for_datetime();
-    return 0;
-  }
-  else if (type == STRING_RESULT &&
-           (*a)->result_type() == STRING_RESULT &&
-           (*b)->result_type() == STRING_RESULT)
+  if (type == STRING_RESULT &&
+      (*a)->result_type() == STRING_RESULT &&
+      (*b)->result_type() == STRING_RESULT)
   {
     DTCollation coll;
     coll.set((*a)->collation.collation);
@@ -965,8 +777,10 @@ int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
                                b, 1, MY_COLL_CMP_CONV, 1))
       return 1;
   }
-  else if (try_year_cmp_func(type))
-    return 0;
+  if (type == INT_RESULT &&
+      (*a)->field_type() == MYSQL_TYPE_YEAR &&
+      (*b)->field_type() == MYSQL_TYPE_YEAR)
+    type= TIME_RESULT;
 
   a= cache_converted_constant(thd, a, &a_cache, type);
   b= cache_converted_constant(thd, b, &b_cache, type);
@@ -974,46 +788,6 @@ int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
 }
 
 
-/*
-  Helper function to call from Arg_comparator::set_cmp_func()
-*/
-
-bool Arg_comparator::try_year_cmp_func(Item_result type)
-{
-  if (type == ROW_RESULT)
-    return FALSE;
-
-  bool a_is_year= (*a)->field_type() == MYSQL_TYPE_YEAR;
-  bool b_is_year= (*b)->field_type() == MYSQL_TYPE_YEAR;
-
-  if (!a_is_year && !b_is_year)
-    return FALSE;
-
-  if (a_is_year && b_is_year)
-  {
-    get_value_a_func= &get_year_value;
-    get_value_b_func= &get_year_value;
-  }
-  else if (a_is_year && (*b)->is_datetime())
-  {
-    get_value_a_func= &get_year_value;
-    get_value_b_func= &get_datetime_value;
-  }
-  else if (b_is_year && (*a)->is_datetime())
-  {
-    get_value_b_func= &get_year_value;
-    get_value_a_func= &get_datetime_value;
-  }
-  else
-    return FALSE;
-
-  is_nulls_eq= is_owner_equal_func();
-  func= &Arg_comparator::compare_datetime;
-  set_cmp_context_for_datetime();
-
-  return TRUE;
-}
-
 /**
   Convert and cache a constant.
 
@@ -1035,9 +809,14 @@ Item** Arg_comparator::cache_converted_constant(THD *thd_arg, Item **value,
                                                 Item **cache_item,
                                                 Item_result type)
 {
-  /* Don't need cache if doing context analysis only. */
-  if (!thd->lex->is_ps_or_view_context_analysis() &&
-      (*value)->const_item() && type != (*value)->result_type())
+  /*
+    Don't need cache if doing context analysis only.
+    Also, get_datetime_value creates Item_cache internally.
+    Unless fixed, we should not do it here.
+  */
+  if (!thd_arg->lex->is_ps_or_view_context_analysis() &&
+      (*value)->const_item() && type != (*value)->result_type() &&
+      type != TIME_RESULT)
   {
     Item_cache *cache= Item_cache::get_cache(*value, type);
     cache->setup(*value);
@@ -1055,110 +834,78 @@ void Arg_comparator::set_datetime_cmp_func(Item_result_field *owner_arg,
   owner= owner_arg;
   a= a1;
   b= b1;
-  a_type= (*a)->field_type();
-  b_type= (*b)->field_type();
   a_cache= 0;
   b_cache= 0;
-  is_nulls_eq= FALSE;
-  func= &Arg_comparator::compare_datetime;
-  get_value_a_func= &get_datetime_value;
-  get_value_b_func= &get_datetime_value;
-  set_cmp_context_for_datetime();
+  func= comparator_matrix[TIME_RESULT][is_owner_equal_func()];
 }
 
-
-/*
+/**
   Retrieves correct DATETIME value from given item.
 
-  SYNOPSIS
-    get_datetime_value()
-    thd                 thread handle
-    item_arg   [in/out] item to retrieve DATETIME value from
-    cache_arg  [in/out] pointer to place to store the caching item to
-    warn_item  [in]     item for issuing the conversion warning
-    is_null    [out]    TRUE <=> the item_arg is null
+  @param[in]     thd         thread handle
+  @param[in,out] item_arg    item to retrieve DATETIME value from
+  @param[in,out] cache_arg   pointer to place to store the caching item to
+  @param[in]     warn_item   item for issuing the conversion warning
+  @param[out]    is_null     TRUE <=> the item_arg is null
 
-  DESCRIPTION
+  @details
     Retrieves the correct DATETIME value from given item for comparison by the
     compare_datetime() function.
-    If item's result can be compared as longlong then its int value is used
-    and its string value is used otherwise. Strings are always parsed and
-    converted to int values by the get_date_from_str() function.
-    This allows us to compare correctly string dates with missed insignificant
-    zeros. If an item is a constant one then its value is cached and it isn't
-    get parsed again. An Item_cache_int object is used for caching values. It
-    seamlessly substitutes the original item.  The cache item is marked as
-    non-constant to prevent re-caching it again.  In order to compare
-    correctly DATE and DATETIME items the result of the former are treated as
-    a DATETIME with zero time (00:00:00).
 
-  RETURN
-    obtained value
+    If the value should be compared as time (TIME_RESULT), it's retrieved as
+    MYSQL_TIME. Otherwise it's read as a number/string and converted to time.
+    Constant items are cached, so the convertion is only done once for them.
+
+    Note the f_type behavior: if the item can be compared as time, then
+    f_type is this item's field_type(). Otherwise it's field_type() of
+    warn_item (which is the other operand of the comparison operator).
+    This logic provides correct string/number to date/time conversion
+    depending on the other operand (when comparing a string with a date, it's
+    parsed as a date, when comparing a string with a time it's parsed as a time)
+
+    If the item is a constant it is replaced by the Item_cache_int, that
+    holds the packed datetime value.
+
+  @return
+    MYSQL_TIME value, packed in a longlong, suitable for comparison.
 */
 
 longlong
 get_datetime_value(THD *thd, Item ***item_arg, Item **cache_arg,
                    Item *warn_item, bool *is_null)
 {
-  longlong value= 0;
-  String buf, *str= 0;
+  longlong UNINIT_VAR(value);
   Item *item= **item_arg;
+  enum_field_types f_type= item->cmp_type() == TIME_RESULT ?
+                           item->field_type() : warn_item->field_type();
 
-  if (item->result_as_longlong())
+  if (item->result_type() == INT_RESULT && item->cmp_type() == TIME_RESULT)
   {
+    /* it's our Item_cache_temporal, as created below */
     value= item->val_int();
-    *is_null= item->null_value;
-    enum_field_types f_type= item->field_type();
-    /*
-      Item_date_add_interval may return MYSQL_TYPE_STRING as the result
-      field type. To detect that the DATE value has been returned we
-      compare it with 100000000L - any DATE value should be less than it.
-      Don't shift cached DATETIME values up for the second time.
-    */
-    if (f_type == MYSQL_TYPE_DATE ||
-        (f_type != MYSQL_TYPE_DATETIME && value < 100000000L))
-      value*= 1000000L;
   }
   else
   {
-    str= item->val_str(&buf);
-    *is_null= item->null_value;
+    MYSQL_TIME ltime;
+    uint fuzzydate= TIME_FUZZY_DATE | TIME_INVALID_DATES;
+    if (f_type == MYSQL_TYPE_TIME)
+      fuzzydate|= TIME_TIME_ONLY;
+    if (item->get_date(&ltime, fuzzydate))
+      value= 0; /* invalid date */
+    else
+      value= pack_time(&ltime);
   }
-  if (*is_null)
+  if ((*is_null= item->null_value))
     return ~(ulonglong) 0;
-  /*
-    Convert strings to the integer DATE/DATETIME representation.
-    Even if both dates provided in strings we can't compare them directly as
-    strings as there is no warranty that they are correct and do not miss
-    some insignificant zeros.
-  */
-  if (str)
+  if (cache_arg && item->const_item() && item->type() != Item::CACHE_ITEM)
   {
-    bool error;
-    enum_field_types f_type= warn_item->field_type();
-    timestamp_type t_type= f_type ==
-      MYSQL_TYPE_DATE ? MYSQL_TIMESTAMP_DATE : MYSQL_TIMESTAMP_DATETIME;
-    value= (longlong) get_date_from_str(thd, str, t_type, warn_item->name, &error);
-    /*
-      If str did not contain a valid date according to the current
-      SQL_MODE, get_date_from_str() has already thrown a warning,
-      and we don't want to throw NULL on invalid date (see 5.2.6
-      "SQL modes" in the manual), so we're done here.
-    */
-  }
-  /*
-    Do not cache GET_USER_VAR() function as its const_item() may return TRUE
-    for the current thread but it still may change during the execution.
-  */
-  if (item->const_item() && cache_arg &&
-      item->type() != Item::CACHE_ITEM &&
-      (item->type() != Item::FUNC_ITEM ||
-       ((Item_func*)item)->functype() != Item_func::GUSERVAR_FUNC))
-  {
-    Item_cache_int *cache= new Item_cache_int(MYSQL_TYPE_DATETIME);
-    /* Mark the cache as non-const to prevent re-caching. */
-    cache->set_used_tables(1);
-    cache->store(item, value);
+    Query_arena backup;
+    Query_arena *save_arena= thd->switch_to_arena_for_cached_items(&backup);
+    Item_cache_temporal *cache= new Item_cache_temporal(f_type);
+    if (save_arena)
+      thd->set_query_arena(save_arena);
+
+    cache->store_packed(value);
     *cache_arg= cache;
     *item_arg= cache_arg;
   }
@@ -1167,67 +914,6 @@ get_datetime_value(THD *thd, Item ***item_arg, Item **cache_arg,
 
 
 /*
-  Retrieves YEAR value of 19XX-00-00 00:00:00 form from given item.
-
-  SYNOPSIS
-    get_year_value()
-    thd                 thread handle
-    item_arg   [in/out] item to retrieve YEAR value from
-    cache_arg  [in/out] pointer to place to store the caching item to
-    warn_item  [in]     item for issuing the conversion warning
-    is_null    [out]    TRUE <=> the item_arg is null
-
-  DESCRIPTION
-    Retrieves the YEAR value of 19XX form from given item for comparison by the
-    compare_datetime() function.
-    Converts year to DATETIME of form YYYY-00-00 00:00:00 for the compatibility
-    with the get_datetime_value function result.
-
-  RETURN
-    obtained value
-*/
-
-static longlong
-get_year_value(THD *thd, Item ***item_arg, Item **cache_arg,
-               Item *warn_item, bool *is_null)
-{
-  longlong value= 0;
-  Item *item= **item_arg;
-
-  value= item->val_int();
-  *is_null= item->null_value;
-  if (*is_null)
-    return ~(ulonglong) 0;
-
-  /*
-    Coerce value to the 19XX form in order to correctly compare
-    YEAR(2) & YEAR(4) types.
-    Here we are converting all item values but YEAR(4) fields since
-      1) YEAR(4) already has a regular YYYY form and
-      2) we don't want to convert zero/bad YEAR(4) values to the
-         value of 2000.
-  */
-  Item *real_item= item->real_item();
-  Field *field= NULL;
-  if (real_item->type() == Item::FIELD_ITEM)
-    field= ((Item_field *)real_item)->field;
-  else if (real_item->type() == Item::CACHE_ITEM)
-    field= ((Item_cache *)real_item)->field();
-  if (!(field && field->type() == MYSQL_TYPE_YEAR && field->field_length == 4))
-  {
-    if (value < 70)
-      value+= 100;
-    if (value <= 1900)
-      value+= 1900;
-  }
-  /* Convert year to DATETIME of form YYYY-00-00 00:00:00 (YYYY0000000000). */
-  value*= 10000000000LL;
-
-  return value;
-}
-
-
-/*
   Compare items values as dates.
 
   SYNOPSIS
@@ -1239,18 +925,9 @@ get_year_value(THD *thd, Item ***item_arg, Item **cache_arg,
     with help of the get_datetime_value() function.
 
   RETURN
-    If is_nulls_eq is TRUE:
-       1    if items are equal or both are null
-       0    otherwise
-    If is_nulls_eq is FALSE:
       -1   a < b or at least one item is null
        0   a == b
        1   a > b
-    See the table:
-    is_nulls_eq | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 |
-    a_is_null   | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
-    b_is_null   | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 |
-    result      | 1 | 0 | 0 |0/1|-1 |-1 |-1 |-1/0/1|
 */
 
 int Arg_comparator::compare_datetime()
@@ -1258,34 +935,40 @@ int Arg_comparator::compare_datetime()
   bool a_is_null, b_is_null;
   longlong a_value, b_value;
 
+  if (set_null)
+    owner->null_value= 1;
+
   /* Get DATE/DATETIME/TIME value of the 'a' item. */
-  a_value= (*get_value_a_func)(thd, &a, &a_cache, *b, &a_is_null);
-  if (!is_nulls_eq && a_is_null)
-  {
-    if (set_null)
-      owner->null_value= 1;
+  a_value= get_datetime_value(thd, &a, &a_cache, *b, &a_is_null);
+  if (a_is_null)
     return -1;
-  }
 
   /* Get DATE/DATETIME/TIME value of the 'b' item. */
-  b_value= (*get_value_b_func)(thd, &b, &b_cache, *a, &b_is_null);
-  if (a_is_null || b_is_null)
-  {
-    if (set_null)
-      owner->null_value= is_nulls_eq ? 0 : 1;
-    return is_nulls_eq ? (a_is_null == b_is_null) : -1;
-  }
+  b_value= get_datetime_value(thd, &b, &b_cache, *a, &b_is_null);
+  if (b_is_null)
+    return -1;
 
   /* Here we have two not-NULL values. */
   if (set_null)
     owner->null_value= 0;
 
   /* Compare values. */
-  if (is_nulls_eq)
-    return (a_value == b_value);
-  return a_value < b_value ? -1 : (a_value > b_value ? 1 : 0);
+  return a_value < b_value ? -1 : a_value > b_value ? 1 : 0;
 }
 
+int Arg_comparator::compare_e_datetime()
+{
+  bool a_is_null, b_is_null;
+  longlong a_value, b_value;
+
+  /* Get DATE/DATETIME/TIME value of the 'a' item. */
+  a_value= get_datetime_value(thd, &a, &a_cache, *b, &a_is_null);
+
+  /* Get DATE/DATETIME/TIME value of the 'b' item. */
+  b_value= get_datetime_value(thd, &b, &b_cache, *a, &b_is_null);
+  return a_is_null || b_is_null ? a_is_null == b_is_null
+                                : a_value == b_value;
+}
 
 int Arg_comparator::compare_string()
 {
@@ -1712,6 +1395,26 @@ longlong Item_func_truth::val_int()
 }
 
 
+bool Item_in_optimizer::is_top_level_item()
+{
+  return ((Item_in_subselect *)args[1])->is_top_level_item();
+}
+
+
+bool Item_in_optimizer::eval_not_null_tables(uchar *opt_arg)
+{
+  not_null_tables_cache= 0;
+  if (is_top_level_item())
+  {
+    /*
+      It is possible to determine NULL-rejectedness of the left arguments
+      of IN only if it is a top-level predicate.
+    */
+    not_null_tables_cache= args[0]->not_null_tables();
+  }
+  return FALSE;
+}
+
 bool Item_in_optimizer::fix_left(THD *thd, Item **ref)
 {
   if ((!args[0]->fixed && args[0]->fix_fields(thd, args)) ||
@@ -1721,7 +1424,11 @@ bool Item_in_optimizer::fix_left(THD *thd, Item **ref)
   cache->setup(args[0]);
   if (cache->cols() == 1)
   {
-    if ((used_tables_cache= args[0]->used_tables()))
+    /* 
+      Note: there can be cases when used_tables()==0 && !const_item(). See
+      Item_sum::update_used_tables for details.
+    */
+    if ((used_tables_cache= args[0]->used_tables()) || !args[0]->const_item())
       cache->set_used_tables(OUTER_REF_TABLE_BIT);
     else
       cache->set_used_tables(0);
@@ -1731,17 +1438,22 @@ bool Item_in_optimizer::fix_left(THD *thd, Item **ref)
     uint n= cache->cols();
     for (uint i= 0; i < n; i++)
     {
-      if (args[0]->element_index(i)->used_tables())
+      Item *element=args[0]->element_index(i);
+      if (element->used_tables() || !element->const_item())
 	((Item_cache *)cache->element_index(i))->set_used_tables(OUTER_REF_TABLE_BIT);
       else
 	((Item_cache *)cache->element_index(i))->set_used_tables(0);
     }
     used_tables_cache= args[0]->used_tables();
   }
-  not_null_tables_cache= args[0]->not_null_tables();
+  eval_not_null_tables(NULL);
   with_sum_func= args[0]->with_sum_func;
+  with_field= args[0]->with_field;
   if ((const_item_cache= args[0]->const_item()))
+  {
     cache->store(args[0]);
+    cache->cache_value();
+  }
   return 0;
 }
 
@@ -1765,8 +1477,8 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
   if (args[1]->maybe_null)
     maybe_null=1;
   with_sum_func= with_sum_func || args[1]->with_sum_func;
+  with_field= with_field || args[1]->with_field;
   used_tables_cache|= args[1]->used_tables();
-  not_null_tables_cache|= args[1]->not_null_tables();
   const_item_cache&= args[1]->const_item();
   fixed= 1;
   return FALSE;
@@ -1774,6 +1486,64 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
 
 
 /**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item *Item_in_optimizer::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_in_optimizer::expr_cache_insert_transformer");
+  if (args[1]->type() != Item::SUBSELECT_ITEM)
+    DBUG_RETURN(this); // MAX/MIN transformed => do nothing
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  if (args[1]->expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd)))
+    DBUG_RETURN(expr_cache);
+
+  DBUG_RETURN(this);
+}
+
+
+
+/**
+    Collect and add to the list cache parameters for this Item.
+
+    @param parameters    The list where to add parameters
+*/
+
+void Item_in_optimizer::get_cache_parameters(List<Item> &parameters)
+{
+  /* Add left expression to the list of the parameters of the subquery */
+  if (args[0]->cols() == 1)
+    parameters.add_unique(args[0], &cmp_items);
+  else
+  {
+    for (uint i= 0; i < args[0]->cols(); i++)
+    {
+      parameters.add_unique(args[0]->element_index(i), &cmp_items);
+    }
+  }
+  args[1]->get_cache_parameters(parameters);
+}
+
+/**
    The implementation of optimized \<outer expression\> [NOT] IN \<subquery\>
    predicates. The implementation works as follows.
 
@@ -1842,14 +1612,23 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
 
      @see Item_in_subselect::val_bool()
      @see Item_is_not_null_test::val_int()
- */
+*/
+
 longlong Item_in_optimizer::val_int()
 {
   bool tmp;
   DBUG_ASSERT(fixed == 1);
   cache->store(args[0]);
   cache->cache_value();
-  
+
+  if (args[1]->type() != Item::SUBSELECT_ITEM)
+  {
+    /* MAX/MIN transformed => pass through */
+    longlong res= args[1]->val_int();
+    null_value= args[1]->null_value;
+    return (res);
+  }
+
   if (cache->null_value)
   {
     /*
@@ -1944,6 +1723,7 @@ void Item_in_optimizer::cleanup()
   Item_bool_func::cleanup();
   if (!save_cache)
     cache= 0;
+  expr_cache= 0;
   DBUG_VOID_RETURN;
 }
 
@@ -1955,6 +1735,94 @@ bool Item_in_optimizer::is_null()
 }
 
 
+/**
+  Transform an Item_in_optimizer and its arguments with a callback function.
+
+  @param transformer the transformer callback function to be applied to the
+         nodes of the tree of the object
+  @param parameter to be passed to the transformer
+
+  @detail
+    Recursively transform the left and the right operand of this Item. The
+    Right operand is an Item_in_subselect or its subclass. To avoid the
+    creation of new Items, we use the fact the the left operand of the
+    Item_in_subselect is the same as the one of 'this', so instead of
+    transforming its operand, we just assign the left operand of the
+    Item_in_subselect to be equal to the left operand of 'this'.
+    The transformation is not applied further to the subquery operand
+    if the IN predicate.
+
+  @returns
+    @retval pointer to the transformed item
+    @retval NULL if an error occurred
+*/
+
+Item *Item_in_optimizer::transform(Item_transformer transformer, uchar *argument)
+{
+  Item *new_item;
+
+  DBUG_ASSERT(!current_thd->stmt_arena->is_stmt_prepare());
+  DBUG_ASSERT(arg_count == 2);
+
+  /* Transform the left IN operand. */
+  new_item= (*args)->transform(transformer, argument);
+  if (!new_item)
+    return 0;
+  /*
+    THD::change_item_tree() should be called only if the tree was
+    really transformed, i.e. when a new item has been created.
+    Otherwise we'll be allocating a lot of unnecessary memory for
+    change records at each execution.
+  */
+  if ((*args) != new_item)
+    current_thd->change_item_tree(args, new_item);
+
+  if (args[1]->type() != Item::SUBSELECT_ITEM)
+  {
+    /* MAX/MIN transformed => pass through */
+    new_item= args[1]->transform(transformer, argument);
+    if (!new_item)
+      return 0;
+    if (args[1] != new_item)
+      current_thd->change_item_tree(args, new_item);
+  }
+  else
+  {
+    /*
+      Transform the right IN operand which should be an Item_in_subselect or a
+      subclass of it. The left operand of the IN must be the same as the left
+      operand of this Item_in_optimizer, so in this case there is no further
+      transformation, we only make both operands the same.
+      TODO: is it the way it should be?
+    */
+    DBUG_ASSERT((args[1])->type() == Item::SUBSELECT_ITEM &&
+                (((Item_subselect*)(args[1]))->substype() ==
+                 Item_subselect::IN_SUBS ||
+                 ((Item_subselect*)(args[1]))->substype() ==
+                 Item_subselect::ALL_SUBS ||
+                 ((Item_subselect*)(args[1]))->substype() ==
+                 Item_subselect::ANY_SUBS));
+
+    Item_in_subselect *in_arg= (Item_in_subselect*)args[1];
+    current_thd->change_item_tree(&in_arg->left_expr, args[0]);
+  }
+  return (this->*transformer)(argument);
+}
+
+
+bool Item_in_optimizer::is_expensive_processor(uchar *arg)
+{
+  return args[0]->is_expensive_processor(arg) ||
+         args[1]->is_expensive_processor(arg);
+}
+
+
+bool Item_in_optimizer::is_expensive()
+{
+  return args[0]->is_expensive() || args[1]->is_expensive();
+}
+
+
 longlong Item_func_eq::val_int()
 {
   DBUG_ASSERT(fixed == 1);
@@ -2112,6 +1980,7 @@ void Item_func_interval::fix_length_and_dec()
   used_tables_cache|= row->used_tables();
   not_null_tables_cache= row->not_null_tables();
   with_sum_func= with_sum_func || row->with_sum_func;
+  with_field= with_field || row->with_field;
   const_item_cache&= row->const_item();
 }
 
@@ -2244,6 +2113,16 @@ bool Item_func_between::fix_fields(THD *thd, Item **ref)
 
   thd->lex->current_select->between_count++;
 
+
+  return 0;
+}
+
+
+bool Item_func_between::eval_not_null_tables(uchar *opt_arg)
+{
+  if (Item_func_opt_neg::eval_not_null_tables(NULL))
+    return 1;
+
   /* not_null_tables_cache == union(T1(e),T1(e1),T1(e2)) */
   if (pred_level && !negated)
     return 0;
@@ -2252,19 +2131,15 @@ bool Item_func_between::fix_fields(THD *thd, Item **ref)
   not_null_tables_cache= (args[0]->not_null_tables() |
                           (args[1]->not_null_tables() &
                            args[2]->not_null_tables()));
-
   return 0;
-}
+}  
 
 
 void Item_func_between::fix_length_and_dec()
 {
-  max_length= 1;
-  int i;
-  bool datetime_found= FALSE;
-  int time_items_found= 0;
-  compare_as_dates= TRUE;
   THD *thd= current_thd;
+  max_length= 1;
+  compare_as_dates= 0;
 
   /*
     As some compare functions are generated after sql_yacc,
@@ -2279,81 +2154,79 @@ void Item_func_between::fix_length_and_dec()
    return;
 
   /*
-    Detect the comparison of DATE/DATETIME items.
-    At least one of items should be a DATE/DATETIME item and other items
-    should return the STRING result.
+    When comparing as date/time, we need to convert non-temporal values
+    (e.g.  strings) to MYSQL_TIME. get_datetime_value() does it
+    automatically when one of the operands is a date/time.  But here we
+    may need to compare two strings as dates (str1 BETWEEN str2 AND date).
+    For this to work, we need to know what date/time type we compare
+    strings as.
   */
-  if (cmp_type == STRING_RESULT)
-  {
-    for (i= 0; i < 3; i++)
-    {
-      if (args[i]->is_datetime())
-      {
-        datetime_found= TRUE;
-        continue;
-      }
-      if (args[i]->field_type() == MYSQL_TYPE_TIME &&
-          args[i]->result_as_longlong())
-        time_items_found++;
-    }
-  }
-  if (!datetime_found)
-    compare_as_dates= FALSE;
+  if (cmp_type ==  TIME_RESULT)
+    compare_as_dates= find_date_time_item(args, 3, 0);
 
-  if (compare_as_dates)
-  {
-    ge_cmp.set_datetime_cmp_func(this, args, args + 1);
-    le_cmp.set_datetime_cmp_func(this, args, args + 2);
-  }
-  else if (time_items_found == 3)
-  {
-    /* Compare TIME items as integers. */
-    cmp_type= INT_RESULT;
-  }
-  else if (args[0]->real_item()->type() == FIELD_ITEM &&
-           thd->lex->sql_command != SQLCOM_CREATE_VIEW &&
-           thd->lex->sql_command != SQLCOM_SHOW_CREATE)
+  /* See the comment about the similar block in Item_bool_func2 */
+  if (args[0]->real_item()->type() == FIELD_ITEM &&
+      !thd->lex->is_ps_or_view_context_analysis())
   {
     Item_field *field_item= (Item_field*) (args[0]->real_item());
-    if (field_item->field->can_be_compared_as_longlong())
+    if (field_item->cmp_type() == INT_RESULT)
     {
       /*
-        The following can't be recoded with || as convert_constant_item
+        The following can't be recoded with || as convert_const_to_int
         changes the argument
       */
-      if (convert_constant_item(thd, field_item, &args[1]))
-        cmp_type=INT_RESULT;			// Works for all types.
-      if (convert_constant_item(thd, field_item, &args[2]))
-        cmp_type=INT_RESULT;			// Works for all types.
+      if (convert_const_to_int(thd, field_item, &args[1]))
+        cmp_type=INT_RESULT;
+      if (convert_const_to_int(thd, field_item, &args[2]))
+        cmp_type=INT_RESULT;
     }
   }
 }
 
 
 longlong Item_func_between::val_int()
-{						// ANSI BETWEEN
+{
   DBUG_ASSERT(fixed == 1);
-  if (compare_as_dates)
+
+  switch (cmp_type) {
+  case TIME_RESULT:
   {
-    int ge_res, le_res;
+    THD *thd= current_thd;
+    longlong value, a, b;
+    Item *cache, **ptr;
+    bool value_is_null, a_is_null, b_is_null;
+
+    ptr= &args[0];
+    value= get_datetime_value(thd, &ptr, &cache, compare_as_dates,
+                              &value_is_null);
+    if (ptr != &args[0])
+      thd->change_item_tree(&args[0], *ptr);
 
-    ge_res= ge_cmp.compare();
-    if ((null_value= args[0]->null_value))
+    if ((null_value= value_is_null))
       return 0;
-    le_res= le_cmp.compare();
 
-    if (!args[1]->null_value && !args[2]->null_value)
-      return (longlong) ((ge_res >= 0 && le_res <=0) != negated);
-    else if (args[1]->null_value)
-    {
-      null_value= le_res > 0;			// not null if false range.
-    }
+    ptr= &args[1];
+    a= get_datetime_value(thd, &ptr, &cache, compare_as_dates, &a_is_null);
+    if (ptr != &args[1])
+      thd->change_item_tree(&args[1], *ptr);
+
+    ptr= &args[2];
+    b= get_datetime_value(thd, &ptr, &cache, compare_as_dates, &b_is_null);
+    if (ptr != &args[2])
+      thd->change_item_tree(&args[2], *ptr);
+
+    if (!a_is_null && !b_is_null)
+      return (longlong) ((value >= a && value <= b) != negated);
+    if (a_is_null && b_is_null)
+      null_value=1;
+    else if (a_is_null)
+      null_value= value <= b;			// not null if false range.
     else
-    {
-      null_value= ge_res < 0;
-    }
+      null_value= value >= a;
+    break;
   }
-  else if (cmp_type == STRING_RESULT)
+
+  case STRING_RESULT:
   {
     String *value,*a,*b;
     value=args[0]->val_str(&value0);
@@ -2377,8 +2250,9 @@ longlong Item_func_between::val_int()
       // Set to not null if false range.
       null_value= sortcmp(value,a,cmp_collation.collation) >= 0;
     }
+    break;
   }
-  else if (cmp_type == INT_RESULT)
+  case INT_RESULT:
   {
     longlong value=args[0]->val_int(), a, b;
     if ((null_value=args[0]->null_value))
@@ -2397,8 +2271,9 @@ longlong Item_func_between::val_int()
     {
       null_value= value >= a;
     }
+    break;
   }
-  else if (cmp_type == DECIMAL_RESULT)
+  case DECIMAL_RESULT:
   {
     my_decimal dec_buf, *dec= args[0]->val_decimal(&dec_buf),
                a_buf, *a_dec, b_buf, *b_dec;
@@ -2415,8 +2290,9 @@ longlong Item_func_between::val_int()
       null_value= (my_decimal_cmp(dec, b_dec) <= 0);
     else
       null_value= (my_decimal_cmp(dec, a_dec) >= 0);
+    break;
   }
-  else
+  case REAL_RESULT:
   {
     double value= args[0]->val_real(),a,b;
     if ((null_value=args[0]->null_value))
@@ -2435,6 +2311,13 @@ longlong Item_func_between::val_int()
     {
       null_value= value >= a;
     }
+    break;
+  }
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);
+    null_value= 1;
+    return 0;
   }
   return (longlong) (!null_value && negated);
 }
@@ -2487,7 +2370,8 @@ Item_func_ifnull::fix_length_and_dec()
     decimals= 0;
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   fix_char_length(char_length);
@@ -2618,6 +2502,16 @@ Item_func_if::fix_fields(THD *thd, Item **ref)
   if (Item_func::fix_fields(thd, ref))
     return 1;
 
+  return 0;
+}
+
+
+bool
+Item_func_if::eval_not_null_tables(uchar *opt_arg)
+{
+  if (Item_func::eval_not_null_tables(NULL))
+    return 1;
+
   not_null_tables_cache= (args[1]->not_null_tables() &
                           args[2]->not_null_tables());
 
@@ -3128,13 +3022,15 @@ void Item_func_case::fix_length_and_dec()
       for (nagg= 0; nagg < ncases / 2; nagg++)
         change_item_tree_if_needed(thd, &args[nagg * 2], agg[nagg + 1]);
     }
-    for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
+
+    for (i= 0; i <= (uint)TIME_RESULT; i++)
     {
       if (found_types & (1 << i) && !cmp_items[i])
       {
         DBUG_ASSERT((Item_result)i != ROW_RESULT);
+        DBUG_ASSERT((Item_result)i != TIME_RESULT);
         if (!(cmp_items[i]=
-            cmp_item::get_comparator((Item_result)i,
+            cmp_item::get_comparator((Item_result)i, 0,
                                      cmp_collation.collation)))
           return;
       }
@@ -3162,7 +3058,7 @@ void Item_func_case::fix_length_and_dec()
       agg_num_lengths(args[else_expr_num]);
     max_length= my_decimal_precision_to_length_no_truncation(max_length +
                                                              decimals, decimals,
-                                                             unsigned_flag);
+                                               unsigned_flag);
   }
 }
 
@@ -3215,7 +3111,7 @@ void Item_func_case::cleanup()
   uint i;
   DBUG_ENTER("Item_func_case::cleanup");
   Item_func::cleanup();
-  for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
+  for (i= 0; i <= (uint)TIME_RESULT; i++)
   {
     delete cmp_items[i];
     cmp_items[i]= 0;
@@ -3271,6 +3167,21 @@ double Item_func_coalesce::real_op()
 }
 
 
+bool Item_func_coalesce::get_date(MYSQL_TIME *ltime,uint fuzzydate)
+{
+  DBUG_ASSERT(fixed == 1);
+  null_value= 0;
+  for (uint i= 0; i < arg_count; i++)
+  {
+    bool res= args[i]->get_date(ltime, fuzzydate);
+    if (!args[i]->null_value)
+      return res;
+  }
+  null_value=1;
+  return 1;
+}
+
+
 my_decimal *Item_func_coalesce::decimal_op(my_decimal *decimal_value)
 {
   DBUG_ASSERT(fixed == 1);
@@ -3290,6 +3201,14 @@ void Item_func_coalesce::fix_length_and_dec()
 {
   cached_field_type= agg_field_type(args, arg_count);
   agg_result_type(&hybrid_type, args, arg_count);
+  Item_result cmp_type;
+  agg_cmp_type(&cmp_type, args, arg_count);
+  ///< @todo let result_type() return TIME_RESULT and remove this special case
+  if (cmp_type == TIME_RESULT)
+  {
+    count_real_length();
+    return;
+  }
   switch (hybrid_type) {
   case STRING_RESULT:
     decimals= NOT_FIXED_DEC;
@@ -3308,7 +3227,8 @@ void Item_func_coalesce::fix_length_and_dec()
     decimals= 0;
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
 }
@@ -3398,19 +3318,19 @@ int cmp_longlong(void *cmp_arg,
       One of the args is unsigned and is too big to fit into the 
       positive signed range. Report no match.
     */  
-    if ((a->unsigned_flag && ((ulonglong) a->val) > (ulonglong) LONGLONG_MAX) ||
+    if ((a->unsigned_flag && ((ulonglong) a->val) > (ulonglong) LONGLONG_MAX)
+        ||
         (b->unsigned_flag && ((ulonglong) b->val) > (ulonglong) LONGLONG_MAX))
       return a->unsigned_flag ? 1 : -1;
     /*
       Although the signedness differs both args can fit into the signed 
       positive range. Make them signed and compare as usual.
     */  
-    return cmp_longs (a->val, b->val);
+    return cmp_longs(a->val, b->val);
   }
   if (a->unsigned_flag)
-    return cmp_ulongs ((ulonglong) a->val, (ulonglong) b->val);
-  else
-    return cmp_longs (a->val, b->val);
+    return cmp_ulongs((ulonglong) a->val, (ulonglong) b->val);
+  return cmp_longs(a->val, b->val);
 }
 
 static int cmp_double(void *cmp_arg, double *a,double *b)
@@ -3623,7 +3543,7 @@ uchar *in_decimal::get_value(Item *item)
 }
 
 
-cmp_item* cmp_item::get_comparator(Item_result type,
+cmp_item* cmp_item::get_comparator(Item_result type, Item *warn_item,
                                    CHARSET_INFO *cs)
 {
   switch (type) {
@@ -3637,7 +3557,10 @@ cmp_item* cmp_item::get_comparator(Item_result type,
     return new cmp_item_row;
   case DECIMAL_RESULT:
     return new cmp_item_decimal;
-  default:
+  case TIME_RESULT:
+    DBUG_ASSERT(warn_item);
+    return new cmp_item_datetime(warn_item);
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
     break;
   }
@@ -3702,7 +3625,7 @@ void cmp_item_row::store_value(Item *item)
     {
       if (!comparators[i])
         if (!(comparators[i]=
-              cmp_item::get_comparator(item->element_index(i)->result_type(),
+              cmp_item::get_comparator(item->element_index(i)->result_type(), 0,
                                        item->element_index(i)->collation.collation)))
 	  break;					// new failed
       comparators[i]->store_value(item->element_index(i));
@@ -3878,11 +3801,22 @@ bool Item_func_in::nulls_in_row()
 bool
 Item_func_in::fix_fields(THD *thd, Item **ref)
 {
-  Item **arg, **arg_end;
 
   if (Item_func_opt_neg::fix_fields(thd, ref))
     return 1;
 
+  return 0;
+}
+
+
+bool
+Item_func_in::eval_not_null_tables(uchar *opt_arg)
+{
+  Item **arg, **arg_end;
+
+  if (Item_func_opt_neg::eval_not_null_tables(NULL))
+    return 1;
+
   /* not_null_tables_cache == union(T1(e),union(T1(ei))) */
   if (pred_level && negated)
     return 0;
@@ -3903,20 +3837,17 @@ static int srtcmp_in(CHARSET_INFO *cs, const String *x,const String *y)
                                (uchar *) y->ptr(),y->length(), 0);
 }
 
-
 void Item_func_in::fix_length_and_dec()
 {
   Item **arg, **arg_end;
   bool const_itm= 1;
   THD *thd= current_thd;
-  bool datetime_found= FALSE;
   /* TRUE <=> arguments values will be compared as DATETIMEs. */
-  bool compare_as_datetime= FALSE;
   Item *date_arg= 0;
   uint found_types= 0;
   uint type_cnt= 0, i;
   Item_result cmp_type= STRING_RESULT;
-  left_result_type= args[0]->result_type();
+  left_result_type= args[0]->cmp_type();
   if (!(found_types= collect_cmp_types(args, arg_count, true)))
     return;
   
@@ -3928,7 +3859,7 @@ void Item_func_in::fix_length_and_dec()
       break;
     }
   }
-  for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
+  for (i= 0; i <= (uint)TIME_RESULT; i++)
   {
     if (found_types & 1 << i)
     {
@@ -3943,16 +3874,12 @@ void Item_func_in::fix_length_and_dec()
         agg_arg_charsets_for_comparison(cmp_collation, args, arg_count))
       return;
     arg_types_compatible= TRUE;
-  }
-  if (type_cnt == 1)
-  {
-    /*
-      When comparing rows create the row comparator object beforehand to ease
-      the DATETIME comparison detection procedure.
-    */
+
     if (cmp_type == ROW_RESULT)
     {
+      uint cols= args[0]->cols();
       cmp_item_row *cmp= 0;
+
       if (const_itm && !nulls_in_row())
       {
         array= new in_row(arg_count-1, 0);
@@ -3964,66 +3891,20 @@ void Item_func_in::fix_length_and_dec()
           return;
         cmp_items[ROW_RESULT]= cmp;
       }
-      cmp->n= args[0]->cols();
+      cmp->n= cols;
       cmp->alloc_comparators();
-    }
-    /* All DATE/DATETIME fields/functions has the STRING result type. */
-    if (cmp_type == STRING_RESULT || cmp_type == ROW_RESULT)
-    {
-      uint col, cols= args[0]->cols();
 
-      for (col= 0; col < cols; col++)
+      for (uint col= 0; col < cols; col++)
       {
-        bool skip_column= FALSE;
-        /*
-          Check that all items to be compared has the STRING result type and at
-          least one of them is a DATE/DATETIME item.
-        */
-        for (arg= args, arg_end= args + arg_count; arg != arg_end ; arg++)
+        date_arg= find_date_time_item(args, arg_count, col);
+        if (date_arg)
         {
-          Item *itm= ((cmp_type == STRING_RESULT) ? arg[0] :
-                      arg[0]->element_index(col));
-          if (itm->result_type() != STRING_RESULT)
-          {
-            skip_column= TRUE;
-            break;
-          }
-          else if (itm->is_datetime())
-          {
-            datetime_found= TRUE;
-            /*
-              Internally all DATE/DATETIME values are converted to the DATETIME
-              type. So try to find a DATETIME item to issue correct warnings.
-            */
-            if (!date_arg)
-              date_arg= itm;
-            else if (itm->field_type() == MYSQL_TYPE_DATETIME)
-            {
-              date_arg= itm;
-              /* All arguments are already checked to have the STRING result. */
-              if (cmp_type == STRING_RESULT)
-                break;
-            }
-          }
-        }
-        if (skip_column)
-          continue;
-        if (datetime_found)
-        {
-          if (cmp_type == ROW_RESULT)
-          {
-            cmp_item **cmp= 0;
-            if (array)
-              cmp= ((in_row*)array)->tmp.comparators + col;
-            else
-              cmp= ((cmp_item_row*)cmp_items[ROW_RESULT])->comparators + col;
-            *cmp= new cmp_item_datetime(date_arg);
-            /* Reset variables for the next column. */
-            date_arg= 0;
-            datetime_found= FALSE;
-          }
+          cmp_item **cmp= 0;
+          if (array)
+            cmp= ((in_row*)array)->tmp.comparators + col;
           else
-            compare_as_datetime= TRUE;
+            cmp= ((cmp_item_row*)cmp_items[ROW_RESULT])->comparators + col;
+          *cmp= new cmp_item_datetime(date_arg);
         }
       }
     }
@@ -4034,61 +3915,60 @@ void Item_func_in::fix_length_and_dec()
   */
   if (type_cnt == 1 && const_itm && !nulls_in_row())
   {
-    if (compare_as_datetime)
-      array= new in_datetime(date_arg, arg_count - 1);
-    else
+    /*
+      IN must compare INT columns and constants as int values (the same
+      way as equality does).
+      So we must check here if the column on the left and all the constant 
+      values on the right can be compared as integers and adjust the 
+      comparison type accordingly.
+
+      See the comment about the similar block in Item_bool_func2
+    */  
+    if (args[0]->real_item()->type() == FIELD_ITEM &&
+        !thd->lex->is_view_context_analysis() && cmp_type != INT_RESULT)
     {
-      /*
-        IN must compare INT columns and constants as int values (the same
-        way as equality does).
-        So we must check here if the column on the left and all the constant 
-        values on the right can be compared as integers and adjust the 
-        comparison type accordingly.
-      */  
-      if (args[0]->real_item()->type() == FIELD_ITEM &&
-          thd->lex->sql_command != SQLCOM_CREATE_VIEW &&
-          thd->lex->sql_command != SQLCOM_SHOW_CREATE &&
-          cmp_type != INT_RESULT)
+      Item_field *field_item= (Item_field*) (args[0]->real_item());
+      if (field_item->cmp_type() == INT_RESULT)
       {
-        Item_field *field_item= (Item_field*) (args[0]->real_item());
-        if (field_item->field->can_be_compared_as_longlong())
+        bool all_converted= TRUE;
+        for (arg=args+1, arg_end=args+arg_count; arg != arg_end ; arg++)
         {
-          bool all_converted= TRUE;
-          for (arg=args+1, arg_end=args+arg_count; arg != arg_end ; arg++)
-          {
-            if (!convert_constant_item (thd, field_item, &arg[0]))
-              all_converted= FALSE;
-          }
-          if (all_converted)
-            cmp_type= INT_RESULT;
+           if (!convert_const_to_int(thd, field_item, &arg[0]))
+            all_converted= FALSE;
         }
+        if (all_converted)
+          cmp_type= INT_RESULT;
       }
-      switch (cmp_type) {
-      case STRING_RESULT:
-        array=new in_string(arg_count-1,(qsort2_cmp) srtcmp_in, 
-                            cmp_collation.collation);
-        break;
-      case INT_RESULT:
-        array= new in_longlong(arg_count-1);
-        break;
-      case REAL_RESULT:
-        array= new in_double(arg_count-1);
-        break;
-      case ROW_RESULT:
-        /*
-          The row comparator was created at the beginning but only DATETIME
-          items comparators were initialized. Call store_value() to setup
-          others.
-        */
-        ((in_row*)array)->tmp.store_value(args[0]);
-        break;
-      case DECIMAL_RESULT:
-        array= new in_decimal(arg_count - 1);
-        break;
-      default:
-        DBUG_ASSERT(0);
-        return;
-      }
+    }
+    switch (cmp_type) {
+    case STRING_RESULT:
+      array=new in_string(arg_count-1,(qsort2_cmp) srtcmp_in, 
+                          cmp_collation.collation);
+      break;
+    case INT_RESULT:
+      array= new in_longlong(arg_count-1);
+      break;
+    case REAL_RESULT:
+      array= new in_double(arg_count-1);
+      break;
+    case ROW_RESULT:
+      /*
+        The row comparator was created at the beginning but only DATETIME
+        items comparators were initialized. Call store_value() to setup
+        others.
+      */
+      ((in_row*)array)->tmp.store_value(args[0]);
+      break;
+    case DECIMAL_RESULT:
+      array= new in_decimal(arg_count - 1);
+      break;
+    case TIME_RESULT:
+      date_arg= find_date_time_item(args, arg_count, 0);
+      array= new in_datetime(date_arg, arg_count - 1);
+      break;
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);
+      break;
     }
     if (array && !(thd->is_fatal_error))		// If not EOM
     {
@@ -4107,22 +3987,19 @@ void Item_func_in::fix_length_and_dec()
   }
   else
   {
-    if (compare_as_datetime)
-      cmp_items[STRING_RESULT]= new cmp_item_datetime(date_arg);
-    else
+    for (i= 0; i <= (uint) TIME_RESULT; i++)
     {
-      for (i= 0; i <= (uint) DECIMAL_RESULT; i++)
+      if (found_types & (1 << i) && !cmp_items[i])
       {
-        if (found_types & (1 << i) && !cmp_items[i])
-        {
-          if ((Item_result)i == STRING_RESULT &&
-              agg_arg_charsets_for_comparison(cmp_collation, args, arg_count))
-            return;
-          if (!cmp_items[i] && !(cmp_items[i]=
-              cmp_item::get_comparator((Item_result)i,
-                                       cmp_collation.collation)))
-            return;
-        }
+        if ((Item_result)i == STRING_RESULT &&
+            agg_arg_charsets_for_comparison(cmp_collation, args, arg_count))
+          return;
+        if ((Item_result)i == TIME_RESULT)
+          date_arg= find_date_time_item(args, arg_count, 0);
+        if (!cmp_items[i] && !(cmp_items[i]=
+            cmp_item::get_comparator((Item_result)i, date_arg,
+                                     cmp_collation.collation)))
+          return;
       }
     }
   }
@@ -4190,7 +4067,7 @@ longlong Item_func_in::val_int()
       have_null= TRUE;
       continue;
     }
-    Item_result cmp_type= item_cmp_type(left_result_type, args[i]->result_type());
+    Item_result cmp_type= item_cmp_type(left_result_type, args[i]->cmp_type());
     in_item= cmp_items[(uint)cmp_type];
     DBUG_ASSERT(in_item);
     if (!(value_added_map & (1 << (uint)cmp_type)))
@@ -4274,9 +4151,13 @@ Item_cond::fix_fields(THD *thd, Item **ref)
   DBUG_ASSERT(fixed == 0);
   List_iterator<Item> li(list);
   Item *item;
+  TABLE_LIST *save_emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
   uchar buff[sizeof(char*)];			// Max local vars in function
   not_null_tables_cache= used_tables_cache= 0;
   const_item_cache= 1;
+
+  if (functype() != COND_AND_FUNC)
+    thd->thd_marker.emb_on_expr_nest= NULL;
   /*
     and_table_cache is the value that Item_cond_or() returns for
     not_null_tables()
@@ -4302,7 +4183,6 @@ Item_cond::fix_fields(THD *thd, Item **ref)
   */
   while ((item=li++))
   {
-    table_map tmp_table_map;
     while (item->type() == Item::COND_ITEM &&
 	   ((Item_cond*) item)->functype() == functype() &&
            !((Item_cond*) item)->list.is_empty())
@@ -4324,22 +4204,80 @@ Item_cond::fix_fields(THD *thd, Item **ref)
       and_tables_cache= (table_map) 0;
     else
     {
-      tmp_table_map= item->not_null_tables();
+      table_map tmp_table_map= item->not_null_tables();
       not_null_tables_cache|= tmp_table_map;
       and_tables_cache&= tmp_table_map;
       const_item_cache= FALSE;
-    }  
+    } 
+  
     with_sum_func=	    with_sum_func || item->with_sum_func;
+    with_field=             with_field || item->with_field;
     with_subselect|=        item->with_subselect;
     if (item->maybe_null)
       maybe_null=1;
   }
   thd->lex->current_select->cond_count+= list.elements;
+  thd->thd_marker.emb_on_expr_nest= save_emb_on_expr_nest;
   fix_length_and_dec();
   fixed= 1;
   return FALSE;
 }
 
+
+bool
+Item_cond::eval_not_null_tables(uchar *opt_arg)
+{
+  Item *item;
+  List_iterator<Item> li(list);
+  and_tables_cache= ~(table_map) 0;
+  while ((item=li++))
+  {
+    table_map tmp_table_map;
+    if (item->const_item())
+      and_tables_cache= (table_map) 0;
+    else
+    {
+      tmp_table_map= item->not_null_tables();
+      not_null_tables_cache|= tmp_table_map;
+      and_tables_cache&= tmp_table_map;
+    }
+  }
+  return 0;
+}
+
+
+void Item_cond::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  List_iterator<Item> li(list);
+  Item *item;
+
+  used_tables_cache=0;
+  const_item_cache=1;
+
+  and_tables_cache= ~(table_map) 0; // Here and below we do as fix_fields does
+  not_null_tables_cache= 0;
+
+  while ((item=li++))
+  {
+    table_map tmp_table_map;
+    item->fix_after_pullout(new_parent, li.ref());
+    item= *li.ref();
+    used_tables_cache|= item->used_tables();
+    const_item_cache&= item->const_item();
+
+    if (item->const_item())
+      and_tables_cache= (table_map) 0;
+    else
+    {
+      tmp_table_map= item->not_null_tables();
+      not_null_tables_cache|= tmp_table_map;
+      and_tables_cache&= tmp_table_map;
+      const_item_cache= FALSE;
+    }  
+  }
+}
+
+
 bool Item_cond::walk(Item_processor processor, bool walk_subquery, uchar *arg)
 {
   List_iterator_fast<Item> li(list);
@@ -4651,12 +4589,6 @@ Item *and_expressions(Item *a, Item *b, Item **org_item)
 longlong Item_func_isnull::val_int()
 {
   DBUG_ASSERT(fixed == 1);
-  /*
-    Handle optimization if the argument can't be null
-    This has to be here because of the test in update_used_tables().
-  */
-  if (!used_tables_cache && !with_subselect)
-    return cached_value;
   return args[0]->is_null() ? 1: 0;
 }
 
@@ -4664,12 +4596,6 @@ longlong Item_is_not_null_test::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_is_not_null_test::val_int");
-  if (!used_tables_cache && !with_subselect)
-  {
-    owner->was_null|= (!cached_value);
-    DBUG_PRINT("info", ("cached: %ld", (long) cached_value));
-    DBUG_RETURN(cached_value);
-  }
   if (args[0]->is_null())
   {
     DBUG_PRINT("info", ("null"));
@@ -4686,19 +4612,9 @@ longlong Item_is_not_null_test::val_int()
 void Item_is_not_null_test::update_used_tables()
 {
   if (!args[0]->maybe_null)
-  {
     used_tables_cache= 0;			/* is always true */
-    cached_value= (longlong) 1;
-  }
   else
-  {
     args[0]->update_used_tables();
-    if (!(used_tables_cache=args[0]->used_tables()) && !with_subselect)
-    {
-      /* Remember if the value is always NULL or never NULL */
-      cached_value= (longlong) !args[0]->is_null();
-    }
-  }
 }
 
 
@@ -4835,8 +4751,7 @@ bool Item_func_like::fix_fields(THD *thd, Item **ref)
       We could also do boyer-more for non-const items, but as we would have to
       recompute the tables for each row it's not worth it.
     */
-    if (args[1]->const_item() && !use_strnxfrm(collation.collation) &&
-       !(specialflag & SPECIAL_NO_NEW_FUNC))
+    if (args[1]->const_item() && !use_strnxfrm(collation.collation))
     {
       String* res2 = args[1]->val_str(&cmp.value2);
       if (!res2)
@@ -4950,6 +4865,7 @@ Item_func_regex::fix_fields(THD *thd, Item **ref)
        args[1]->fix_fields(thd, args + 1)) || args[1]->check_cols(1))
     return TRUE;				/* purecov: inspected */
   with_sum_func=args[0]->with_sum_func || args[1]->with_sum_func;
+  with_field= args[0]->with_field || args[1]->with_field;
   max_length= 1;
   decimals= 0;
 
@@ -5270,23 +5186,21 @@ bool Item_func_like::turboBM_matches(const char* text, int text_len) const
     very fast to use.
 */
 
-longlong Item_cond_xor::val_int()
+longlong Item_func_xor::val_int()
 {
   DBUG_ASSERT(fixed == 1);
-  List_iterator<Item> li(list);
-  Item *item;
-  int result=0;	
-  null_value=0;
-  while ((item=li++))
+  int result= 0;
+  null_value= false;
+  for (uint i= 0; i < arg_count; i++)
   {
-    result^= (item->val_int() != 0);
-    if (item->null_value)
+    result^= (args[i]->val_int() != 0);
+    if (args[i]->null_value)
     {
-      null_value=1;
+      null_value= true;
       return 0;
     }
   }
-  return (longlong) result;
+  return result;
 }
 
 /**
@@ -5327,6 +5241,33 @@ Item *Item_bool_rowready_func2::neg_transformer(THD *thd)
   return item;
 }
 
+/**
+  XOR can be negated by negating one of the operands:
+
+  NOT (a XOR b)  => (NOT a) XOR b
+                 => a       XOR (NOT b)
+
+  @param thd     Thread handle
+  @return        New negated item
+*/
+Item *Item_func_xor::neg_transformer(THD *thd)
+{
+  Item *neg_operand;
+  Item_func_xor *new_item;
+  if ((neg_operand= args[0]->neg_transformer(thd)))
+    // args[0] has neg_tranformer
+    new_item= new(thd->mem_root) Item_func_xor(neg_operand, args[1]);
+  else if ((neg_operand= args[1]->neg_transformer(thd)))
+    // args[1] has neg_tranformer
+    new_item= new(thd->mem_root) Item_func_xor(args[0], neg_operand);
+  else
+  {
+    neg_operand= new(thd->mem_root) Item_func_not(args[0]);
+    new_item= new(thd->mem_root) Item_func_xor(neg_operand, args[1]);
+  }
+  return new_item;
+}
+
 
 /**
   a IS NULL  ->  a IS NOT NULL.
@@ -5371,7 +5312,7 @@ Item *Item_func_nop_all::neg_transformer(THD *thd)
   /* "NOT (e $cmp$ ANY (SELECT ...)) -> e $rev_cmp$" ALL (SELECT ...) */
   Item_func_not_all *new_item= new Item_func_not_all(args[0]);
   Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
-  allany->func= allany->func_creator(FALSE);
+  allany->create_comp_func(FALSE);
   allany->all= !allany->all;
   allany->upper_item= new_item;
   return new_item;
@@ -5383,7 +5324,7 @@ Item *Item_func_not_all::neg_transformer(THD *thd)
   Item_func_nop_all *new_item= new Item_func_nop_all(args[0]);
   Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
   allany->all= !allany->all;
-  allany->func= allany->func_creator(TRUE);
+  allany->create_comp_func(TRUE);
   allany->upper_item= new_item;
   return new_item;
 }
@@ -5432,43 +5373,92 @@ Item *Item_bool_rowready_func2::negated_item()
   return 0;
 }
 
-Item_equal::Item_equal(Item_field *f1, Item_field *f2)
-  : Item_bool_func(), const_item(0), eval_item(0), cond_false(0),
-    compare_as_dates(FALSE)
-{
-  const_item_cache= 0;
-  fields.push_back(f1);
-  fields.push_back(f2);
-}
 
-Item_equal::Item_equal(Item *c, Item_field *f)
+/**
+  Construct a minimal multiple equality item
+
+  @param f1               the first equal item
+  @param f2               the second equal item
+  @param with_const_item  TRUE if the first item is constant
+
+  @details
+  The constructor builds a new item equal object for the equality f1=f2.
+  One of the equal items can be constant. If this is the case it is passed
+  always as the first parameter and the parameter with_const_item serves
+  as an indicator of this case.
+  Currently any non-constant parameter items must point to an item of the
+  of the type Item_field or Item_direct_view_ref(Item_field). 
+*/
+
+Item_equal::Item_equal(Item *f1, Item *f2, bool with_const_item)
   : Item_bool_func(), eval_item(0), cond_false(0)
 {
   const_item_cache= 0;
-  fields.push_back(f);
-  const_item= c;
-  compare_as_dates= f->is_datetime();
+  with_const= with_const_item;
+  equal_items.push_back(f1);
+  equal_items.push_back(f2);
+  compare_as_dates= with_const_item && f2->cmp_type() == TIME_RESULT;
 }
 
 
+/**
+  Copy constructor for a multiple equality
+  
+  @param item_equal   source item for the constructor
+
+  @details
+  The function creates a copy of an Item_equal object.
+  This constructor is used when an item belongs to a multiple equality
+  of an upper level (an upper AND/OR level or an upper level of a nested
+  outer join).
+*/
+
 Item_equal::Item_equal(Item_equal *item_equal)
   : Item_bool_func(), eval_item(0), cond_false(0)
 {
   const_item_cache= 0;
-  List_iterator_fast<Item_field> li(item_equal->fields);
-  Item_field *item;
+  List_iterator_fast<Item> li(item_equal->equal_items);
+  Item *item;
   while ((item= li++))
   {
-    fields.push_back(item);
+    equal_items.push_back(item);
   }
-  const_item= item_equal->const_item;
+  with_const= item_equal->with_const;
   compare_as_dates= item_equal->compare_as_dates;
   cond_false= item_equal->cond_false;
 }
 
 
-void Item_equal::compare_const(Item *c)
+/*
+  @brief
+  Add a constant item to the Item_equal object
+
+  @param[in]  c  the constant to add
+  @param[in]  f  item from the list equal_items the item c is equal to
+                 (this parameter is optional)
+
+  @details
+  The method adds the constant item c to the equal_items list. If the list
+  doesn't have any constant item yet the item c is just put in the front
+  the list. Otherwise the value of c is compared with the value of the
+  constant item from equal_items. If they are not equal cond_false is set
+  to TRUE. This serves as an indicator that this Item_equal is always FALSE.
+  The optional parameter f is used to adjust the flag compare_as_dates.
+*/
+
+void Item_equal::add_const(Item *c, Item *f)
 {
+  if (cond_false)
+    return;
+  if (!with_const)
+  {
+    with_const= TRUE;
+    if (f)
+      compare_as_dates= f->cmp_type() == TIME_RESULT;
+    equal_items.push_front(c);
+    return;
+  }
+  Item *const_item= get_const();
   if (compare_as_dates)
   {
     cmp.set_datetime_cmp_func(this, &c, &const_item);
@@ -5485,65 +5475,28 @@ void Item_equal::compare_const(Item *c)
     const_item_cache= 1;
 }
 
-
-void Item_equal::add(Item *c, Item_field *f)
-{
-  if (cond_false)
-    return;
-  if (!const_item)
-  {
-    DBUG_ASSERT(f);
-    const_item= c;
-    compare_as_dates= f->is_datetime();
-    return;
-  }
-  compare_const(c);
-}
-
-
-void Item_equal::add(Item *c)
-{
-  if (cond_false)
-    return;
-  if (!const_item)
-  {
-    const_item= c;
-    return;
-  }
-  compare_const(c);
-}
-
-void Item_equal::add(Item_field *f)
-{
-  fields.push_back(f);
-}
-
-uint Item_equal::members()
-{
-  return fields.elements;
-}
-
-
 /**
-  Check whether a field is referred in the multiple equality.
-
-  The function checks whether field is occurred in the Item_equal object .
+  @brief
+  Check whether a field is referred to in the multiple equality
 
   @param field   field whose occurrence is to be checked
 
+  @details
+  The function checks whether field is referred to by one of the
+  items from the equal_items list.
+
   @retval
-    1       if nultiple equality contains a reference to field
+    1       if multiple equality contains a reference to field
   @retval
     0       otherwise    
 */
 
 bool Item_equal::contains(Field *field)
 {
-  List_iterator_fast<Item_field> it(fields);
-  Item_field *item;
-  while ((item= it++))
+  Item_equal_fields_iterator it(*this);
+  while (it++)
   {
-    if (field->eq(item->field))
+    if (field->eq(it.get_curr_field()))
         return 1;
   }
   return 0;
@@ -5551,99 +5504,93 @@ bool Item_equal::contains(Field *field)
 
 
 /**
-  Join members of another Item_equal object.
+  @brief
+  Join members of another Item_equal object
   
-    The function actually merges two multiple equalities.
-    After this operation the Item_equal object additionally contains
-    the field items of another item of the type Item_equal.
-    If the optional constant items are not equal the cond_false flag is
-    set to 1.  
   @param item    multiple equality whose members are to be joined
+
+  @details
+  The function actually merges two multiple equalities. After this operation
+  the Item_equal object additionally contains the field items of another item of
+  the type Item_equal.
+  If the optional constant items are not equal the cond_false flag is set to TRUE.
+
+  @notes
+  The function is called for any equality f1=f2 such that f1 and f2 are items
+  of the type Item_field or Item_direct_view_ref(Item_field), and, f1->field is
+  referred to in the list this->equal_items, while the list item->equal_items
+  contains a reference to f2->field.  
 */
 
 void Item_equal::merge(Item_equal *item)
 {
-  fields.concat(&item->fields);
-  Item *c= item->const_item;
+  Item *c= item->get_const();
+  if (c)
+    item->equal_items.pop();
+  equal_items.concat(&item->equal_items);
   if (c)
   {
     /* 
-      The flag cond_false will be set to 1 after this, if 
+      The flag cond_false will be set to TRUE after this if 
       the multiple equality already contains a constant and its 
-      value is  not equal to the value of c.
+      value is not equal to the value of c.
     */
-    add(c);
+    add_const(c);
   }
   cond_false|= item->cond_false;
 } 
 
 
 /**
-  Order field items in multiple equality according to a sorting criteria.
+  @brief
+  Order equal items of the  multiple equality according to a sorting criteria
+
+  @param compare      function to compare items from the equal_items list
+  @param arg          context extra parameter for the cmp function
 
-  The function perform ordering of the field items in the Item_equal
-  object according to the criteria determined by the cmp callback parameter.
-  If cmp(item_field1,item_field2,arg)<0 than item_field1 must be
-  placed after item_fiel2.
+  @details
+  The function performs ordering of the items from the equal_items list
+  according to the criteria determined by the cmp callback parameter.
+  If cmp(item1,item2,arg)<0 than item1 must be placed after item2.
 
-  The function sorts field items by the exchange sort algorithm.
+  @notes
+  The function sorts equal items by the bubble sort algorithm.
   The list of field items is looked through and whenever two neighboring
   members follow in a wrong order they are swapped. This is performed
   again and again until we get all members in a right order.
-
-  @param compare      function to compare field item
-  @param arg          context extra parameter for the cmp function
 */
 
 void Item_equal::sort(Item_field_cmpfunc compare, void *arg)
 {
-  bool swap;
-  List_iterator<Item_field> it(fields);
-  do
-  {
-    Item_field *item1= it++;
-    Item_field **ref1= it.ref();
-    Item_field *item2;
-
-    swap= FALSE;
-    while ((item2= it++))
-    {
-      Item_field **ref2= it.ref();
-      if (compare(item1, item2, arg) < 0)
-      {
-        Item_field *item= *ref1;
-        *ref1= *ref2;
-        *ref2= item;
-        swap= TRUE;
-      }
-      else
-      {
-        item1= item2;
-        ref1= ref2;
-      }
-    }
-    it.rewind();
-  } while (swap);
+  bubble_sort<Item>(&equal_items, compare, arg);
 }
 
 
 /**
-  Check appearance of new constant items in the multiple equality object.
+  @brief
+  Check appearance of new constant items in the multiple equality object
 
-  The function checks appearance of new constant items among
-  the members of multiple equalities. Each new constant item is
-  compared with the designated constant item if there is any in the
-  multiple equality. If there is none the first new constant item
-  becomes designated.
+  @details
+  The function checks appearance of new constant items among the members
+  of the equal_items list. Each new constant item is compared with
+  the constant item from the list if there is any. If there is none the first
+  new constant item is placed at the very beginning of the list and
+  with_const is set to TRUE. If it happens that the compared constant items
+  are unequal then the flag cond_false is set to TRUE.
+
+  @notes 
+  Currently this function is called only after substitution of constant tables.
 */
 
 void Item_equal::update_const()
 {
-  List_iterator<Item_field> it(fields);
+  List_iterator<Item> it(equal_items);
+  if (with_const)
+    it++;
   Item *item;
   while ((item= it++))
   {
-    if (item->const_item() &&
+    if (item->const_item() && !item->is_expensive() &&
         /*
           Don't propagate constant status of outer-joined column.
           Such a constant status here is a result of:
@@ -5659,40 +5606,81 @@ void Item_equal::update_const()
         */
         !item->is_outer_field())
     {
-      it.remove();
-      add(item);
-    }
+      if (item == equal_items.head())
+        with_const= TRUE;
+      else
+      {
+        it.remove();
+        add_const(item);
+      }
+    } 
   }
 }
 
+
+/**
+  @brief
+  Fix fields in a completely built multiple equality
+
+  @param  thd     currently not used thread handle 
+  @param  ref     not used
+
+  @details
+  This function is called once the multiple equality has been built out of 
+  the WHERE/ON condition and no new members are expected to be added to the
+  equal_items list anymore.
+  As any implementation of the virtual fix_fields method the function
+  calculates the cached values of not_null_tables_cache, used_tables_cache,
+  const_item_cache and calls fix_length_and_dec().
+  Additionally the function sets a reference to the Item_equal object in
+  the non-constant items of the equal_items list unless such a reference has
+  been already set.
+
+  @notes 
+  Currently this function is called only in the function
+  build_equal_items_for_cond.
+  
+  @retval
+  FALSE   always
+*/
+
 bool Item_equal::fix_fields(THD *thd, Item **ref)
-{
-  List_iterator_fast<Item_field> li(fields);
+{ 
+  DBUG_ASSERT(fixed == 0);
+  Item_equal_fields_iterator it(*this);
   Item *item;
   not_null_tables_cache= used_tables_cache= 0;
   const_item_cache= 0;
-  while ((item= li++))
+  while ((item= it++))
   {
     table_map tmp_table_map;
     used_tables_cache|= item->used_tables();
     tmp_table_map= item->not_null_tables();
     not_null_tables_cache|= tmp_table_map;
     if (item->maybe_null)
-      maybe_null=1;
+      maybe_null= 1;
+    if (!item->get_item_equal())
+      item->set_item_equal(this);
   }
   fix_length_and_dec();
   fixed= 1;
-  return 0;
+  return FALSE;
 }
 
+
+/**
+  Update the value of the used table attribute and other attributes
+ */
+
 void Item_equal::update_used_tables()
 {
-  List_iterator_fast<Item_field> li(fields);
-  Item *item;
   not_null_tables_cache= used_tables_cache= 0;
   if ((const_item_cache= cond_false))
     return;
-  while ((item=li++))
+  Item_equal_fields_iterator it(*this);
+  Item *item;
+  const_item_cache= 1;
+  while ((item= it++))
   {
     item->update_used_tables();
     used_tables_cache|= item->used_tables();
@@ -5701,39 +5689,66 @@ void Item_equal::update_used_tables()
   }
 }
 
+
+
+/**
+  @brief
+  Evaluate multiple equality
+
+  @details
+  The function evaluate multiple equality to a boolean value.
+  The function ignores non-constant items from the equal_items list.
+  The function returns 1 if all constant items from the list are equal. 
+  It returns 0 if there are unequal constant items in the list or 
+  one of the constant items is evaluated to NULL. 
+  
+  @notes 
+  Currently this function can be called only at the optimization
+  stage after the constant table substitution, since all Item_equals
+  are eliminated before the execution stage.
+  
+  @retval
+     0     multiple equality is always FALSE or NULL
+     1     otherwise
+*/
+
 longlong Item_equal::val_int()
 {
-  Item_field *item_field;
   if (cond_false)
     return 0;
-  List_iterator_fast<Item_field> it(fields);
-  Item *item= const_item ? const_item : it++;
+  Item *item= get_const();
+  Item_equal_fields_iterator it(*this);
+  if (!item)
+    item= it++;
   eval_item->store_value(item);
   if ((null_value= item->null_value))
     return 0;
-  while ((item_field= it++))
+  while ((item= it++))
   {
+    Field *field= it.get_curr_field();
     /* Skip fields of non-const tables. They haven't been read yet */
-    if (item_field->field->table->const_table)
+    if (field->table->const_table)
     {
-      if (eval_item->cmp(item_field) || (null_value= item_field->null_value))
+      if (eval_item->cmp(item) || (null_value= item->null_value))
         return 0;
     }
   }
   return 1;
 }
 
+
 void Item_equal::fix_length_and_dec()
 {
-  Item *item= get_first();
-  eval_item= cmp_item::get_comparator(item->result_type(),
+  Item *item= get_first(NULL);
+  eval_item= cmp_item::get_comparator(item->result_type(), 0,
                                       item->collation.collation);
 }
 
+
 bool Item_equal::walk(Item_processor processor, bool walk_subquery, uchar *arg)
 {
-  List_iterator_fast<Item_field> it(fields);
   Item *item;
+  Item_equal_fields_iterator it(*this);
   while ((item= it++))
   {
     if (item->walk(processor, walk_subquery, arg))
@@ -5742,12 +5757,13 @@ bool Item_equal::walk(Item_processor processor, bool walk_subquery, uchar *arg)
   return Item_func::walk(processor, walk_subquery, arg);
 }
 
+
 Item *Item_equal::transform(Item_transformer transformer, uchar *arg)
 {
   DBUG_ASSERT(!current_thd->stmt_arena->is_stmt_prepare());
 
-  List_iterator<Item_field> it(fields);
   Item *item;
+  Item_equal_fields_iterator it(*this);
   while ((item= it++))
   {
     Item *new_item= item->transform(transformer, arg);
@@ -5766,19 +5782,20 @@ Item *Item_equal::transform(Item_transformer transformer, uchar *arg)
   return Item_func::transform(transformer, arg);
 }
 
+
 void Item_equal::print(String *str, enum_query_type query_type)
 {
+  if (cond_false)
+  {
+    str->append('0');
+    return;
+  }
   str->append(func_name());
   str->append('(');
-  List_iterator_fast<Item_field> it(fields);
+  List_iterator_fast<Item> it(equal_items);
   Item *item;
-  if (const_item)
-    const_item->print(str, query_type);
-  else
-  {
-    item= it++;
-    item->print(str, query_type);
-  }
+  item= it++;
+  item->print(str, query_type);
   while ((item= it++))
   {
     str->append(',');
@@ -5788,3 +5805,150 @@ void Item_equal::print(String *str, enum_query_type query_type)
   str->append(')');
 }
 
+
+CHARSET_INFO *Item_equal::compare_collation()
+{ 
+  Item_equal_fields_iterator it(*this);
+  Item *item= it++;
+  return item->collation.collation;
+}
+
+
+/*
+  @brief Get the first equal field of multiple equality.
+  @param[in] field   the field to get equal field to
+
+  @details Get the first field of multiple equality that is equal to the
+  given field. In order to make semi-join materialization strategy work
+  correctly we can't propagate equal fields from upper select to a
+  materialized semi-join.
+  Thus the fields is returned according to following rules:
+
+  1) If the given field belongs to a semi-join then the first field in
+     multiple equality which belong to the same semi-join is returned.
+     Otherwise NULL is returned.
+  2) If the given field doesn't belong to a semi-join then
+     the first field in the multiple equality that doesn't belong to any
+     semi-join is returned.
+     If all fields in the equality are belong to semi-join(s) then NULL
+     is returned.
+  3) If no field is given then the first field in the multiple equality
+     is returned without regarding whether it belongs to a semi-join or not.
+
+  @retval Found first field in the multiple equality.
+  @retval 0 if no field found.
+*/
+
+Item* Item_equal::get_first(Item *field_item)
+{
+  Item_equal_fields_iterator it(*this);
+  Item *item;
+  JOIN_TAB *field_tab;
+  if (!field_item)
+    return (it++);
+  Field *field= ((Item_field *) (field_item->real_item()))->field;
+
+  /*
+    Of all equal fields, return the first one we can use. Normally, this is the
+    field which belongs to the table that is the first in the join order.
+
+    There is one exception to this: When semi-join materialization strategy is
+    used, and the given field belongs to a table within the semi-join nest, we
+    must pick the first field in the semi-join nest.
+
+    Example: suppose we have a join order:
+
+       ot1 ot2  SJ-Mat(it1  it2  it3)  ot3
+
+    and equality ot2.col = it1.col = it2.col
+    If we're looking for best substitute for 'it2.col', we should pick it1.col
+    and not ot2.col.
+    
+    eliminate_item_equal() also has code that deals with equality substitution
+    in presense of SJM nests.
+  */
+
+  field_tab= field->table->reginfo.join_tab;
+
+  TABLE_LIST *emb_nest= field->table->pos_in_table_list->embedding;
+
+  if (emb_nest && emb_nest->sj_mat_info && emb_nest->sj_mat_info->is_used)
+  {
+    /*
+      It's a field from an materialized semi-join. We can substitute it only
+      for a field from the same semi-join. Find the first of such items.
+    */
+
+    while ((item= it++))
+    {
+      if (it.get_curr_field()->table->pos_in_table_list->embedding == emb_nest)
+      {
+        /*
+          If we found given field then return NULL to avoid unnecessary
+          substitution.
+        */
+        return (item != field_item) ? item : NULL;
+      }
+    }
+  }
+  else
+  {
+    /*
+      The field is not in SJ-Materialization nest. We must return the first
+      field in the join order. The field may be inside a semi-join nest, i.e 
+      a join order may look like this:
+
+          SJ-Mat(it1  it2)  ot1  ot2
+
+      where we're looking what to substitute ot2.col for. In this case we must 
+      still return it1.col, here's a proof why:
+
+      First let's note that either it1.col or it2.col participates in 
+      subquery's IN-equality. It can't be otherwise, because materialization is
+      only applicable to uncorrelated subqueries, so the only way we could
+      infer "it1.col=ot1.col" is from the IN-equality. Ok, so IN-eqality has 
+      it1.col or it2.col on its inner side. it1.col is first such item in the
+      join order, so it's not possible for SJ-Mat to be
+      SJ-Materialization-lookup, it is SJ-Materialization-Scan. The scan part
+      of this strategy will unpack value of it1.col=it2.col into it1.col
+      (that's the first equal item inside the subquery), and we'll be able to
+      get it from there. qed.
+    */
+
+    return equal_items.head();
+  }
+  // Shouldn't get here.
+  DBUG_ASSERT(0);
+  return NULL;
+}
+
+
+longlong Item_func_dyncol_exists::val_int()
+{
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+  DYNAMIC_COLUMN col;
+  String *str;
+  ulonglong num;
+  enum enum_dyncol_func_result rc;
+
+  num= args[1]->val_int();
+  str= args[0]->val_str(&tmp);
+  if (args[0]->null_value || args[1]->null_value || num > UINT_MAX16)
+    goto null;
+  col.length= str->length();
+  /* We do not change the string, so could do this trick */
+  col.str= (char *)str->ptr();
+  rc= dynamic_column_exists(&col, (uint) num);
+  if (rc < 0)
+  {
+    dynamic_column_error_message(rc);
+    goto null;
+  }
+  null_value= FALSE;
+  return rc == ER_DYNCOL_YES;
+
+null:
+  null_value= TRUE;
+  return 0;
+}
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index b6928d63ea0..67035384cf7 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -1,7 +1,7 @@
 #ifndef ITEM_CMPFUNC_INCLUDED
 #define ITEM_CMPFUNC_INCLUDED
-
-/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -33,39 +33,29 @@ class Arg_comparator;
 
 typedef int (Arg_comparator::*arg_cmp_func)();
 
-typedef int (*Item_field_cmpfunc)(Item_field *f1, Item_field *f2, void *arg); 
+typedef int (*Item_field_cmpfunc)(Item *f1, Item *f2, void *arg); 
 
 class Arg_comparator: public Sql_alloc
 {
   Item **a, **b;
   arg_cmp_func func;
   Item_result_field *owner;
+  bool set_null;                   // TRUE <=> set owner->null_value
   Arg_comparator *comparators;   // used only for compare_row()
   double precision;
   /* Fields used in DATE/DATETIME comparison. */
   THD *thd;
-  enum_field_types a_type, b_type; // Types of a and b items
   Item *a_cache, *b_cache;         // Cached values of a and b items
-  bool is_nulls_eq;                // TRUE <=> compare for the EQUAL_FUNC
-  bool set_null;                   // TRUE <=> set owner->null_value
                                    //   when one of arguments is NULL.
-  enum enum_date_cmp_type { CMP_DATE_DFLT= 0, CMP_DATE_WITH_DATE,
-                            CMP_DATE_WITH_STR, CMP_STR_WITH_DATE };
-  longlong (*get_value_a_func)(THD *thd, Item ***item_arg, Item **cache_arg,
-                               Item *warn_item, bool *is_null);
-  longlong (*get_value_b_func)(THD *thd, Item ***item_arg, Item **cache_arg,
-                               Item *warn_item, bool *is_null);
-  bool try_year_cmp_func(Item_result type);
 public:
   DTCollation cmp_collation;
   /* Allow owner function to use string buffers. */
   String value1, value2;
 
-  Arg_comparator(): comparators(0), thd(0), a_cache(0), b_cache(0), set_null(TRUE),
-    get_value_a_func(0), get_value_b_func(0) {};
-  Arg_comparator(Item **a1, Item **a2): a(a1), b(a2), comparators(0), thd(0),
-    a_cache(0), b_cache(0), set_null(TRUE),
-    get_value_a_func(0), get_value_b_func(0) {};
+  Arg_comparator():  set_null(TRUE), comparators(0), thd(0),
+    a_cache(0), b_cache(0) {};
+  Arg_comparator(Item **a1, Item **a2): a(a1), b(a2),  set_null(TRUE),
+    comparators(0), thd(0), a_cache(0), b_cache(0) {};
 
   int set_compare_func(Item_result_field *owner, Item_result type);
   inline int set_compare_func(Item_result_field *owner_arg)
@@ -82,8 +72,8 @@ public:
   {
     set_null= set_null_arg;
     return set_cmp_func(owner_arg, a1, a2,
-                        item_cmp_type((*a1)->result_type(),
-                                      (*a2)->result_type()));
+                        item_cmp_type((*a1)->cmp_type(),
+                                      (*a2)->cmp_type()));
   }
   inline int compare() { return (this->*func)(); }
 
@@ -106,14 +96,12 @@ public:
   int compare_real_fixed();
   int compare_e_real_fixed();
   int compare_datetime();        // compare args[0] & args[1] as DATETIMEs
-
-  static enum enum_date_cmp_type can_compare_as_dates(Item *a, Item *b,
-                                                      ulonglong *const_val_arg);
+  int compare_e_datetime();
 
   Item** cache_converted_constant(THD *thd, Item **value, Item **cache,
                                   Item_result type);
   void set_datetime_cmp_func(Item_result_field *owner_arg, Item **a1, Item **b1);
-  static arg_cmp_func comparator_matrix [5][2];
+  static arg_cmp_func comparator_matrix [6][2];
   inline bool is_owner_equal_func()
   {
     return (owner->type() == Item::FUNC_ITEM &&
@@ -124,17 +112,6 @@ public:
     delete [] comparators;
     comparators= 0;
   }
-  /*
-    Set correct cmp_context if items would be compared as INTs.
-  */
-  inline void set_cmp_context_for_datetime()
-  {
-    DBUG_ASSERT(func == &Arg_comparator::compare_datetime);
-    if ((*a)->result_as_longlong())
-      (*a)->cmp_context= INT_RESULT;
-    if ((*b)->result_as_longlong())
-      (*b)->cmp_context= INT_RESULT;
-  }
   friend class Item_func;
 };
 
@@ -237,7 +214,7 @@ public:
 
 
 class Item_cache;
-#define UNKNOWN ((my_bool)-1)
+#define UNKNOWN (-1)
 
 
 /*
@@ -259,6 +236,7 @@ class Item_in_optimizer: public Item_bool_func
 {
 protected:
   Item_cache *cache;
+  Item *expr_cache;
   bool save_cache;
   /* 
     Stores the value of "NULL IN (SELECT ...)" for uncorrelated subqueries:
@@ -266,10 +244,10 @@ protected:
       FALSE   - result is FALSE
       TRUE    - result is NULL
   */
-  my_bool result_for_null_param;
+  int result_for_null_param;
 public:
   Item_in_optimizer(Item *a, Item_in_subselect *b):
-    Item_bool_func(a, reinterpret_cast<Item *>(b)), cache(0),
+    Item_bool_func(a, reinterpret_cast<Item *>(b)), cache(0), expr_cache(0),
     save_cache(0), result_for_null_param(UNKNOWN)
   {}
   bool fix_fields(THD *, Item **);
@@ -280,6 +258,15 @@ public:
   const char *func_name() const { return "<in_optimizer>"; }
   Item_cache **get_cache() { return &cache; }
   void keep_top_level_cache();
+  Item *transform(Item_transformer transformer, uchar *arg);
+  virtual Item *expr_cache_insert_transformer(uchar *thd_arg);
+  bool is_expensive_processor(uchar *arg);
+  bool is_expensive();
+  void set_join_tab_idx(uint join_tab_idx_arg)
+  { args[1]->set_join_tab_idx(join_tab_idx_arg); }
+  virtual void get_cache_parameters(List<Item> &parameters);
+  bool is_top_level_item();
+  bool eval_not_null_tables(uchar *opt_arg);
 };
 
 class Comp_creator
@@ -287,7 +274,14 @@ class Comp_creator
 public:
   Comp_creator() {}                           /* Remove gcc warning */
   virtual ~Comp_creator() {}                  /* Remove gcc warning */
+  /**
+    Create operation with given arguments.
+  */
   virtual Item_bool_func2* create(Item *a, Item *b) const = 0;
+  /**
+    Create operation with given arguments in swap order.
+  */
+  virtual Item_bool_func2* create_swap(Item *a, Item *b) const = 0;
   virtual const char* symbol(bool invert) const = 0;
   virtual bool eqne_op() const = 0;
   virtual bool l_op() const = 0;
@@ -299,6 +293,7 @@ public:
   Eq_creator() {}                             /* Remove gcc warning */
   virtual ~Eq_creator() {}                    /* Remove gcc warning */
   virtual Item_bool_func2* create(Item *a, Item *b) const;
+  virtual Item_bool_func2* create_swap(Item *a, Item *b) const;
   virtual const char* symbol(bool invert) const { return invert? "<>" : "="; }
   virtual bool eqne_op() const { return 1; }
   virtual bool l_op() const { return 0; }
@@ -310,6 +305,7 @@ public:
   Ne_creator() {}                             /* Remove gcc warning */
   virtual ~Ne_creator() {}                    /* Remove gcc warning */
   virtual Item_bool_func2* create(Item *a, Item *b) const;
+  virtual Item_bool_func2* create_swap(Item *a, Item *b) const;
   virtual const char* symbol(bool invert) const { return invert? "=" : "<>"; }
   virtual bool eqne_op() const { return 1; }
   virtual bool l_op() const { return 0; }
@@ -321,6 +317,7 @@ public:
   Gt_creator() {}                             /* Remove gcc warning */
   virtual ~Gt_creator() {}                    /* Remove gcc warning */
   virtual Item_bool_func2* create(Item *a, Item *b) const;
+  virtual Item_bool_func2* create_swap(Item *a, Item *b) const;
   virtual const char* symbol(bool invert) const { return invert? "<=" : ">"; }
   virtual bool eqne_op() const { return 0; }
   virtual bool l_op() const { return 0; }
@@ -332,6 +329,7 @@ public:
   Lt_creator() {}                             /* Remove gcc warning */
   virtual ~Lt_creator() {}                    /* Remove gcc warning */
   virtual Item_bool_func2* create(Item *a, Item *b) const;
+  virtual Item_bool_func2* create_swap(Item *a, Item *b) const;
   virtual const char* symbol(bool invert) const { return invert? ">=" : "<"; }
   virtual bool eqne_op() const { return 0; }
   virtual bool l_op() const { return 1; }
@@ -343,6 +341,7 @@ public:
   Ge_creator() {}                             /* Remove gcc warning */
   virtual ~Ge_creator() {}                    /* Remove gcc warning */
   virtual Item_bool_func2* create(Item *a, Item *b) const;
+  virtual Item_bool_func2* create_swap(Item *a, Item *b) const;
   virtual const char* symbol(bool invert) const { return invert? "<" : ">="; }
   virtual bool eqne_op() const { return 0; }
   virtual bool l_op() const { return 0; }
@@ -354,6 +353,7 @@ public:
   Le_creator() {}                             /* Remove gcc warning */
   virtual ~Le_creator() {}                    /* Remove gcc warning */
   virtual Item_bool_func2* create(Item *a, Item *b) const;
+  virtual Item_bool_func2* create_swap(Item *a, Item *b) const;
   virtual const char* symbol(bool invert) const { return invert? ">" : "<="; }
   virtual bool eqne_op() const { return 0; }
   virtual bool l_op() const { return 1; }
@@ -387,6 +387,7 @@ public:
   CHARSET_INFO *compare_collation() { return cmp.cmp_collation.collation; }
   uint decimal_precision() const { return 1; }
   void top_level_item() { abort_on_null= TRUE; }
+  Arg_comparator *get_comparator() { return &cmp; }
   void cleanup()
   {
     Item_int_func::cleanup();
@@ -405,7 +406,30 @@ public:
   }
   Item *neg_transformer(THD *thd);
   virtual Item *negated_item();
-  bool subst_argument_checker(uchar **arg) { return TRUE; }
+  bool subst_argument_checker(uchar **arg)
+  {
+    return (*arg != NULL);     
+  }
+};
+
+/**
+  XOR inherits from Item_bool_func2 because it is not optimized yet.
+  Later, when XOR is optimized, it needs to inherit from
+  Item_cond instead. See WL#5800. 
+*/
+class Item_func_xor :public Item_bool_func2
+{
+public:
+  Item_func_xor(Item *i1, Item *i2) :Item_bool_func2(i1, i2) {}
+  enum Functype functype() const { return XOR_FUNC; }
+  const char *func_name() const { return "xor"; }
+  longlong val_int();
+  void top_level_item() {}
+  Item *neg_transformer(THD *thd);
+  bool subst_argument_checker(uchar **arg)
+  {
+    return (*arg != NULL);     
+  }
 };
 
 class Item_func_not :public Item_bool_func
@@ -477,7 +501,7 @@ public:
      show(0)
     {}
   virtual void top_level_item() { abort_on_null= 1; }
-  bool top_level() { return abort_on_null; }
+  bool is_top_level_item() { return abort_on_null; }
   longlong val_int();
   enum Functype functype() const { return NOT_ALL_FUNC; }
   const char *func_name() const { return "<not>"; }
@@ -503,13 +527,23 @@ public:
 class Item_func_eq :public Item_bool_rowready_func2
 {
 public:
-  Item_func_eq(Item *a,Item *b) :Item_bool_rowready_func2(a,b) {}
+  Item_func_eq(Item *a,Item *b) :
+    Item_bool_rowready_func2(a,b), in_equality_no(UINT_MAX)
+  {}
   longlong val_int();
   enum Functype functype() const { return EQ_FUNC; }
   enum Functype rev_functype() const { return EQ_FUNC; }
   cond_result eq_cmp_result() const { return COND_TRUE; }
   const char *func_name() const { return "="; }
   Item *negated_item();
+  /* 
+    - If this equality is created from the subquery's IN-equality:
+      number of the item it was created from, e.g. for
+       (a,b) IN (SELECT c,d ...)  a=c will have in_equality_no=0, 
+       and b=d will have in_equality_no=1.
+    - Otherwise, UINT_MAX
+  */
+  uint in_equality_no;
 };
 
 class Item_func_equal :public Item_bool_rowready_func2
@@ -631,9 +665,7 @@ public:
   Item_result cmp_type;
   String value0,value1,value2;
   /* TRUE <=> arguments will be compared as dates. */
-  bool compare_as_dates;
-  /* Comparators used for DATE/DATETIME comparison. */
-  Arg_comparator ge_cmp, le_cmp;
+  Item *compare_as_dates;
   Item_func_between(Item *a, Item *b, Item *c)
     :Item_func_opt_neg(a, b, c), compare_as_dates(FALSE) {}
   longlong val_int();
@@ -646,6 +678,7 @@ public:
   bool is_bool_func() { return 1; }
   CHARSET_INFO *compare_collation() { return cmp_collation.collation; }
   uint decimal_precision() const { return 1; }
+  bool eval_not_null_tables(uchar *opt_arg);
 };
 
 
@@ -679,7 +712,7 @@ struct interval_range
 class Item_func_interval :public Item_int_func
 {
   Item_row *row;
-  my_bool use_decimal_comparison;
+  bool use_decimal_comparison;
   interval_range *intervals;
 public:
   Item_func_interval(Item_row *a)
@@ -711,6 +744,7 @@ public:
   const char *func_name() const { return "coalesce"; }
   table_map not_null_tables() const { return 0; }
   enum_field_types field_type() const { return cached_field_type; }
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
 };
 
 
@@ -750,6 +784,7 @@ public:
   void fix_length_and_dec();
   uint decimal_precision() const;
   const char *func_name() const { return "if"; }
+  bool eval_not_null_tables(uchar *opt_arg);
 };
 
 
@@ -804,7 +839,7 @@ public:
   virtual uchar *get_value(Item *item)=0;
   void sort()
   {
-    my_qsort2(base,used_count,size,compare,collation);
+    my_qsort2(base,used_count,size,compare,(void*)collation);
   }
   int find(Item *item);
   
@@ -887,7 +922,7 @@ public:
   void value_to_item(uint pos, Item *item)
   {
     ((Item_int*) item)->value= ((packed_longlong*) base)[pos].val;
-    ((Item_int*) item)->unsigned_flag= (my_bool)
+    ((Item_int*) item)->unsigned_flag= (bool)
       ((packed_longlong*) base)[pos].unsigned_flag;
   }
   Item_result result_type() { return INT_RESULT; }
@@ -916,6 +951,16 @@ public:
      lval_cache(0) {};
   void set(uint pos,Item *item);
   uchar *get_value(Item *item);
+  Item* create_item()
+  { 
+    return new Item_datetime();
+  }
+  void value_to_item(uint pos, Item *item)
+  {
+    packed_longlong *val= reinterpret_cast<packed_longlong*>(base)+pos;
+    Item_datetime *dt= reinterpret_cast<Item_datetime*>(item);
+    dt->set(val->val);
+  }
   friend int cmp_longlong(void *cmp_arg, packed_longlong *a,packed_longlong *b);
 };
 
@@ -975,7 +1020,8 @@ public:
   virtual int cmp(Item *item)= 0;
   // for optimized IN with row
   virtual int compare(cmp_item *item)= 0;
-  static cmp_item* get_comparator(Item_result type, CHARSET_INFO *cs);
+  static cmp_item* get_comparator(Item_result type, Item * warn_item,
+                                  CHARSET_INFO *cs);
   virtual cmp_item *make_same()= 0;
   virtual void store_value_by_template(cmp_item *tmpl, Item *item)
   {
@@ -1171,7 +1217,7 @@ class Item_func_case :public Item_func
   Item_result cmp_type;
   DTCollation cmp_collation;
   enum_field_types cached_field_type;
-  cmp_item *cmp_items[5]; /* For all result types */
+  cmp_item *cmp_items[6]; /* For all result types */
   cmp_item *case_item;
 public:
   Item_func_case(List<Item> &list, Item *first_expr_arg, Item *else_expr_arg)
@@ -1261,7 +1307,7 @@ public:
     Item_int_func::cleanup();
     delete array;
     array= 0;
-    for (i= 0; i <= (uint)DECIMAL_RESULT + 1; i++)
+    for (i= 0; i <= (uint)TIME_RESULT; i++)
     {
       delete cmp_items[i];
       cmp_items[i]= 0;
@@ -1276,6 +1322,7 @@ public:
   bool nulls_in_row();
   bool is_bool_func() { return 1; }
   CHARSET_INFO *compare_collation() { return cmp_collation.collation; }
+  bool eval_not_null_tables(uchar *opt_arg);
 };
 
 class cmp_item_row :public cmp_item
@@ -1311,8 +1358,6 @@ public:
 
 class Item_func_isnull :public Item_bool_func
 {
-protected:
-  longlong cached_value;
 public:
   Item_func_isnull(Item *a) :Item_bool_func(a) {}
   longlong val_int();
@@ -1330,17 +1375,12 @@ public:
     {
       used_tables_cache= 0;			/* is always false */
       const_item_cache= 1;
-      cached_value= (longlong) 0;
     }
     else
     {
       args[0]->update_used_tables();
-      if ((const_item_cache= !(used_tables_cache= args[0]->used_tables()) &&
-          !with_subselect))
-      {
-	/* Remember if the value is always NULL or never NULL */
-	cached_value= (longlong) args[0]->is_null();
-      }
+      used_tables_cache= args[0]->used_tables();
+      const_item_cache= args[0]->const_item();
     }
   }
   table_map not_null_tables() const { return 0; }
@@ -1373,6 +1413,7 @@ public:
   */
   table_map used_tables() const
     { return used_tables_cache | RAND_TABLE_BIT; }
+  bool const_item() const { return FALSE; }
 };
 
 
@@ -1502,6 +1543,7 @@ public:
     list.prepand(nlist);
   }
   bool fix_fields(THD *, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
 
   enum Type type() const { return COND_ITEM; }
   List<Item>* argument_list() { return &list; }
@@ -1521,6 +1563,7 @@ public:
   bool subst_argument_checker(uchar **arg) { return TRUE; }
   Item *compile(Item_analyzer analyzer, uchar **arg_p,
                 Item_transformer transformer, uchar *arg_t);
+  bool eval_not_null_tables(uchar *opt_arg);
 };
 
 
@@ -1601,27 +1644,64 @@ public:
 
 class Item_equal: public Item_bool_func
 {
-  List<Item_field> fields; /* list of equal field items                    */
-  Item *const_item;        /* optional constant item equal to fields items */
+  /*
+    The list of equal items. Currently the list can contain:
+     - Item_fields items for references to table columns
+     - Item_direct_view_ref items for references to view columns
+     - one const item
+
+    If the list contains a constant item this item is always first in the list.
+    The list contains at least two elements.
+    Currently all Item_fields/Item_direct_view_ref items in the list should
+    refer to table columns with equavalent type definitions. In particular
+    if these are string columns they should have the same charset/collation.
+
+    Use objects of the companion class Item_equal_fields_iterator to iterate
+    over all items from the list of the Item_field/Item_direct_view_ref classes.
+  */ 
+  List<Item> equal_items; 
+  /* 
+     TRUE <-> one of the items is a const item.
+     Such item is always first in in the equal_items list
+  */
+  bool with_const;        
+  /* 
+    The field eval_item is used when this item is evaluated
+    with the method val_int()
+  */  
   cmp_item *eval_item;
-  Arg_comparator cmp;
+  /*
+    This initially is set to FALSE. It becomes TRUE when this item is evaluated
+    as being always false. If the flag is TRUE the contents of the list 
+    the equal_items should be ignored.
+  */
   bool cond_false;
+  /* 
+    compare_as_dates=TRUE <-> constants equal to fields from equal_items
+    must be compared as datetimes and not as strings.
+    compare_as_dates can be TRUE only if with_const=TRUE 
+  */
   bool compare_as_dates;
+  /* 
+    The comparator used to compare constants equal to fields from equal_items
+    as datetimes. The comparator is used only if compare_as_dates=TRUE
+  */
+  Arg_comparator cmp;
 public:
   inline Item_equal()
-    : Item_bool_func(), const_item(0), eval_item(0), cond_false(0)
+    : Item_bool_func(), with_const(FALSE), eval_item(0), cond_false(0)
   { const_item_cache=0 ;}
-  Item_equal(Item_field *f1, Item_field *f2);
-  Item_equal(Item *c, Item_field *f);
+  Item_equal(Item *f1, Item *f2, bool with_const_item);
   Item_equal(Item_equal *item_equal);
-  inline Item* get_const() { return const_item; }
-  void compare_const(Item *c);
-  void add(Item *c, Item_field *f);
-  void add(Item *c);
-  void add(Item_field *f);
-  uint members();
+  /* Currently the const item is always the first in the list of equal items */
+  inline Item* get_const() { return with_const ? equal_items.head() : NULL; }
+  void add_const(Item *c, Item *f = NULL);
+  /** Add a non-constant item to the multiple equality */
+  void add(Item *f) { equal_items.push_back(f); }
   bool contains(Field *field);
-  Item_field* get_first() { return fields.head(); }
+  Item* get_first(Item *field);
+  /** Get number of field items / references to field items in this object */   
+  uint n_field_items() { return equal_items.elements-test(with_const); }
   void merge(Item_equal *item);
   void update_const();
   enum Functype functype() const { return MULT_EQUAL_FUNC; }
@@ -1629,15 +1709,18 @@ public:
   const char *func_name() const { return "multiple equal"; }
   optimize_type select_optimize() const { return OPTIMIZE_EQUAL; }
   void sort(Item_field_cmpfunc compare, void *arg);
-  friend class Item_equal_iterator;
   void fix_length_and_dec();
   bool fix_fields(THD *thd, Item **ref);
   void update_used_tables();
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
   Item *transform(Item_transformer transformer, uchar *arg);
   virtual void print(String *str, enum_query_type query_type);
-  CHARSET_INFO *compare_collation() 
-  { return fields.head()->collation.collation; }
+  CHARSET_INFO *compare_collation();
+  friend class Item_equal_fields_iterator;
+  friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+                           Item_equal *item_equal);
+  friend bool setup_sj_materialization_part1(struct st_join_table *tab);
+  friend bool setup_sj_materialization_part2(struct st_join_table *tab);
 }; 
 
 class COND_EQUAL: public Sql_alloc
@@ -1655,23 +1738,52 @@ public:
 };
 
 
-class Item_equal_iterator : public List_iterator_fast<Item_field>
+/* 
+  The class Item_equal_fields_iterator is used to iterate over references
+  to table/view columns from a list of equal items.
+*/ 
+
+class Item_equal_fields_iterator : public List_iterator_fast<Item>
 {
+  Item_equal *item_equal;
+  Item *curr_item;
 public:
-  inline Item_equal_iterator(Item_equal &item_equal) 
-    :List_iterator_fast<Item_field> (item_equal.fields)
-  {}
-  inline Item_field* operator++(int)
-  { 
-    Item_field *item= (*(List_iterator_fast<Item_field> *) this)++;
-    return  item;
+  Item_equal_fields_iterator(Item_equal &item_eq) 
+    :List_iterator_fast<Item> (item_eq.equal_items)
+  {
+    curr_item= NULL;
+    item_equal= &item_eq;
+    if (item_eq.with_const)
+    {
+      List_iterator_fast<Item> *list_it= this;
+      curr_item=  (*list_it)++;
+    }
   }
-  inline void rewind(void) 
+  Item* operator++(int)
   { 
-    List_iterator_fast<Item_field>::rewind();
+    List_iterator_fast<Item> *list_it= this;
+    curr_item= (*list_it)++;
+    return curr_item;
+  }
+  Item ** ref()
+  {
+    return List_iterator_fast<Item>::ref();
   }
+  void rewind(void) 
+  { 
+    List_iterator_fast<Item> *list_it= this;
+    list_it->rewind();
+    if (item_equal->with_const)
+      curr_item= (*list_it)++;
+  }  
+  Field *get_curr_field()
+  {
+    Item_field *item= (Item_field *) (curr_item->real_item());
+     return item->field;
+  }  
 };
 
+
 class Item_cond_and :public Item_cond
 {
 public:
@@ -1727,6 +1839,14 @@ public:
   Item *neg_transformer(THD *thd);
 };
 
+class Item_func_dyncol_exists :public Item_bool_func
+{
+public:
+  Item_func_dyncol_exists(Item *str, Item *num) :Item_bool_func(str, num) {}
+  longlong val_int();
+  const char *func_name() const { return "column_exists"; }
+};
+
 inline bool is_cond_or(Item *item)
 {
   if (item->type() != Item::COND_ITEM)
@@ -1736,25 +1856,6 @@ inline bool is_cond_or(Item *item)
   return (cond_item->functype() == Item_func::COND_OR_FUNC);
 }
 
-/*
-  XOR is Item_cond, not an Item_int_func because we could like to
-  optimize (a XOR b) later on. It's low prio, though
-*/
-
-class Item_cond_xor :public Item_cond
-{
-public:
-  Item_cond_xor() :Item_cond() {}
-  Item_cond_xor(Item *i1,Item *i2) :Item_cond(i1,i2) {}
-  enum Functype functype() const { return COND_XOR_FUNC; }
-  /* TODO: remove the next line when implementing XOR optimization */
-  enum Type type() const { return FUNC_ITEM; }
-  longlong val_int();
-  const char *func_name() const { return "xor"; }
-  void top_level_item() {}
-};
-
-
 /* Some useful inline functions */
 
 inline Item *and_conds(Item *a, Item *b)
@@ -1787,3 +1888,4 @@ extern Ge_creator ge_creator;
 extern Le_creator le_creator;
 
 #endif /* ITEM_CMPFUNC_INCLUDED */
+
diff --git a/sql/item_create.cc b/sql/item_create.cc
index 96b483b9e99..315ca857f6f 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -1,5 +1,6 @@
 /*
-   Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -34,6 +35,69 @@
 
 /*
 =============================================================================
+  HELPER FUNCTIONS
+=============================================================================
+*/
+
+static const char* item_name(Item *a, String *str)
+{
+  if (a->name)
+    return a->name;
+  str->length(0);
+  a->print(str, QT_ORDINARY);
+  return str->c_ptr_safe();
+}
+
+
+static void wrong_precision_error(uint errcode, Item *a,
+                                  ulonglong number, ulong maximum)
+{
+  char buff[1024];
+  String buf(buff, sizeof(buff), system_charset_info);
+
+  my_error(errcode, MYF(0), (uint) min(number, UINT_MAX32),
+           item_name(a, &buf), maximum);
+}
+
+
+/**
+  Get precision and scale for a declaration
+ 
+  return
+    0  ok
+    1  error
+*/
+
+bool get_length_and_scale(ulonglong length, ulonglong decimals,
+                          ulong *out_length, uint *out_decimals,
+                          uint max_precision, uint max_scale,
+                          Item *a)
+{
+  if (length > (ulonglong) max_precision)
+  {
+    wrong_precision_error(ER_TOO_BIG_PRECISION, a, length, max_precision);
+    return 1;
+  }
+  if (decimals > (ulonglong) max_scale)
+  {
+    wrong_precision_error(ER_TOO_BIG_SCALE, a, decimals, max_scale);
+    return 1;
+  }
+
+  *out_length=  (ulong) length;
+  *out_decimals=  (uint) decimals;
+  my_decimal_trim(out_length, out_decimals);
+  
+  if (*out_length < *out_decimals)
+  {
+    my_error(ER_M_BIGGER_THAN_D, MYF(0), "");
+    return 1;
+  }
+  return 0;
+}
+
+/*
+=============================================================================
   LOCAL DECLARATIONS
 =============================================================================
 */
@@ -83,7 +147,7 @@ public:
     @param thd The current thread
     @return An item representing the function call
   */
-  virtual Item *create(THD *thd) = 0;
+  virtual Item *create_builder(THD *thd) = 0;
 
 protected:
   /** Constructor. */
@@ -108,7 +172,7 @@ public:
     @param arg1 The first argument of the function
     @return An item representing the function call
   */
-  virtual Item *create(THD *thd, Item *arg1) = 0;
+  virtual Item *create_1_arg(THD *thd, Item *arg1) = 0;
 
 protected:
   /** Constructor. */
@@ -134,7 +198,7 @@ public:
     @param arg2 The second argument of the function
     @return An item representing the function call
   */
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2) = 0;
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) = 0;
 
 protected:
   /** Constructor. */
@@ -161,7 +225,7 @@ public:
     @param arg3 The third argument of the function
     @return An item representing the function call
   */
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3) = 0;
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) = 0;
 
 protected:
   /** Constructor. */
@@ -178,8 +242,8 @@ protected:
 class Create_sp_func : public Create_qfunc
 {
 public:
-  virtual Item *create(THD *thd, LEX_STRING db, LEX_STRING name,
-                       bool use_explicit_name, List<Item> *item_list);
+  virtual Item *create_with_db(THD *thd, LEX_STRING db, LEX_STRING name,
+                               bool use_explicit_name, List<Item> *item_list);
 
   static Create_sp_func s_singleton;
 
@@ -224,7 +288,7 @@ protected:
 class Create_func_abs : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_abs s_singleton;
 
@@ -237,7 +301,7 @@ protected:
 class Create_func_acos : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_acos s_singleton;
 
@@ -250,7 +314,7 @@ protected:
 class Create_func_addtime : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_addtime s_singleton;
 
@@ -263,7 +327,7 @@ protected:
 class Create_func_aes_encrypt : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_aes_encrypt s_singleton;
 
@@ -276,7 +340,7 @@ protected:
 class Create_func_aes_decrypt : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_aes_decrypt s_singleton;
 
@@ -290,7 +354,7 @@ protected:
 class Create_func_area : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_area s_singleton;
 
@@ -305,7 +369,7 @@ protected:
 class Create_func_as_wkb : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_as_wkb s_singleton;
 
@@ -320,7 +384,7 @@ protected:
 class Create_func_as_wkt : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_as_wkt s_singleton;
 
@@ -334,7 +398,7 @@ protected:
 class Create_func_asin : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_asin s_singleton;
 
@@ -360,7 +424,7 @@ protected:
 class Create_func_benchmark : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_benchmark s_singleton;
 
@@ -373,7 +437,7 @@ protected:
 class Create_func_bin : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_bin s_singleton;
 
@@ -386,7 +450,7 @@ protected:
 class Create_func_bit_count : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_bit_count s_singleton;
 
@@ -399,7 +463,7 @@ protected:
 class Create_func_bit_length : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_bit_length s_singleton;
 
@@ -412,7 +476,7 @@ protected:
 class Create_func_ceiling : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ceiling s_singleton;
 
@@ -426,7 +490,7 @@ protected:
 class Create_func_centroid : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_centroid s_singleton;
 
@@ -440,7 +504,7 @@ protected:
 class Create_func_char_length : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_char_length s_singleton;
 
@@ -453,7 +517,7 @@ protected:
 class Create_func_coercibility : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_coercibility s_singleton;
 
@@ -466,7 +530,7 @@ protected:
 class Create_func_compress : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_compress s_singleton;
 
@@ -505,7 +569,7 @@ protected:
 class Create_func_connection_id : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_connection_id s_singleton;
 
@@ -519,7 +583,7 @@ protected:
 class Create_func_contains : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_contains s_singleton;
 
@@ -533,7 +597,7 @@ protected:
 class Create_func_conv : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_conv s_singleton;
 
@@ -546,7 +610,7 @@ protected:
 class Create_func_convert_tz : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_convert_tz s_singleton;
 
@@ -559,7 +623,7 @@ protected:
 class Create_func_cos : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_cos s_singleton;
 
@@ -572,7 +636,7 @@ protected:
 class Create_func_cot : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_cot s_singleton;
 
@@ -585,7 +649,7 @@ protected:
 class Create_func_crc32 : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_crc32 s_singleton;
 
@@ -599,7 +663,7 @@ protected:
 class Create_func_crosses : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_crosses s_singleton;
 
@@ -613,7 +677,7 @@ protected:
 class Create_func_date_format : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_date_format s_singleton;
 
@@ -626,7 +690,7 @@ protected:
 class Create_func_datediff : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_datediff s_singleton;
 
@@ -639,7 +703,7 @@ protected:
 class Create_func_dayname : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dayname s_singleton;
 
@@ -652,7 +716,7 @@ protected:
 class Create_func_dayofmonth : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dayofmonth s_singleton;
 
@@ -665,7 +729,7 @@ protected:
 class Create_func_dayofweek : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dayofweek s_singleton;
 
@@ -678,7 +742,7 @@ protected:
 class Create_func_dayofyear : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dayofyear s_singleton;
 
@@ -691,7 +755,7 @@ protected:
 class Create_func_decode : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_decode s_singleton;
 
@@ -704,7 +768,7 @@ protected:
 class Create_func_degrees : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_degrees s_singleton;
 
@@ -744,7 +808,7 @@ protected:
 class Create_func_dimension : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dimension s_singleton;
 
@@ -759,7 +823,7 @@ protected:
 class Create_func_disjoint : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_disjoint s_singleton;
 
@@ -786,7 +850,7 @@ protected:
 class Create_func_encode : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_encode s_singleton;
 
@@ -813,7 +877,7 @@ protected:
 class Create_func_endpoint : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_endpoint s_singleton;
 
@@ -828,7 +892,7 @@ protected:
 class Create_func_envelope : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_envelope s_singleton;
 
@@ -843,7 +907,7 @@ protected:
 class Create_func_equals : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_equals s_singleton;
 
@@ -857,7 +921,7 @@ protected:
 class Create_func_exp : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_exp s_singleton;
 
@@ -884,7 +948,7 @@ protected:
 class Create_func_exteriorring : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_exteriorring s_singleton;
 
@@ -911,7 +975,7 @@ protected:
 class Create_func_find_in_set : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_find_in_set s_singleton;
 
@@ -924,7 +988,7 @@ protected:
 class Create_func_floor : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_floor s_singleton;
 
@@ -950,7 +1014,7 @@ protected:
 class Create_func_found_rows : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_found_rows s_singleton;
 
@@ -963,7 +1027,7 @@ protected:
 class Create_func_from_days : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_from_days s_singleton;
 
@@ -1020,7 +1084,7 @@ protected:
 class Create_func_geometry_type : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_geometry_type s_singleton;
 
@@ -1035,7 +1099,7 @@ protected:
 class Create_func_geometryn : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_geometryn s_singleton;
 
@@ -1049,7 +1113,7 @@ protected:
 class Create_func_get_lock : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_get_lock s_singleton;
 
@@ -1063,7 +1127,7 @@ protected:
 class Create_func_glength : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_glength s_singleton;
 
@@ -1090,7 +1154,7 @@ protected:
 class Create_func_hex : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_hex s_singleton;
 
@@ -1103,7 +1167,7 @@ protected:
 class Create_func_ifnull : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_ifnull s_singleton;
 
@@ -1116,7 +1180,7 @@ protected:
 class Create_func_inet_ntoa : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_inet_ntoa s_singleton;
 
@@ -1129,7 +1193,7 @@ protected:
 class Create_func_inet_aton : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_inet_aton s_singleton;
 
@@ -1142,7 +1206,7 @@ protected:
 class Create_func_instr : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_instr s_singleton;
 
@@ -1156,7 +1220,7 @@ protected:
 class Create_func_interiorringn : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_interiorringn s_singleton;
 
@@ -1171,7 +1235,7 @@ protected:
 class Create_func_intersects : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_intersects s_singleton;
 
@@ -1185,7 +1249,7 @@ protected:
 class Create_func_is_free_lock : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_is_free_lock s_singleton;
 
@@ -1198,7 +1262,7 @@ protected:
 class Create_func_is_used_lock : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_is_used_lock s_singleton;
 
@@ -1212,7 +1276,7 @@ protected:
 class Create_func_isclosed : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_isclosed s_singleton;
 
@@ -1227,7 +1291,7 @@ protected:
 class Create_func_isempty : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_isempty s_singleton;
 
@@ -1241,7 +1305,7 @@ protected:
 class Create_func_isnull : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_isnull s_singleton;
 
@@ -1255,7 +1319,7 @@ protected:
 class Create_func_issimple : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_issimple s_singleton;
 
@@ -1269,7 +1333,7 @@ protected:
 class Create_func_last_day : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_last_day s_singleton;
 
@@ -1295,7 +1359,7 @@ protected:
 class Create_func_lcase : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_lcase s_singleton;
 
@@ -1321,7 +1385,7 @@ protected:
 class Create_func_length : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_length s_singleton;
 
@@ -1335,7 +1399,7 @@ protected:
 class Create_func_like_range_min : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_like_range_min s_singleton;
 
@@ -1348,7 +1412,7 @@ protected:
 class Create_func_like_range_max : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_like_range_max s_singleton;
 
@@ -1362,7 +1426,7 @@ protected:
 class Create_func_ln : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ln s_singleton;
 
@@ -1375,7 +1439,7 @@ protected:
 class Create_func_load_file : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_load_file s_singleton;
 
@@ -1414,7 +1478,7 @@ protected:
 class Create_func_log10 : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_log10 s_singleton;
 
@@ -1427,7 +1491,7 @@ protected:
 class Create_func_log2 : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_log2 s_singleton;
 
@@ -1440,7 +1504,7 @@ protected:
 class Create_func_lpad : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_lpad s_singleton;
 
@@ -1453,7 +1517,7 @@ protected:
 class Create_func_ltrim : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ltrim s_singleton;
 
@@ -1466,7 +1530,7 @@ protected:
 class Create_func_makedate : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_makedate s_singleton;
 
@@ -1479,7 +1543,7 @@ protected:
 class Create_func_maketime : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_maketime s_singleton;
 
@@ -1518,7 +1582,7 @@ protected:
 class Create_func_md5 : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_md5 s_singleton;
 
@@ -1531,7 +1595,7 @@ protected:
 class Create_func_monthname : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_monthname s_singleton;
 
@@ -1544,7 +1608,7 @@ protected:
 class Create_func_name_const : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_name_const s_singleton;
 
@@ -1557,7 +1621,7 @@ protected:
 class Create_func_nullif : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_nullif s_singleton;
 
@@ -1571,7 +1635,7 @@ protected:
 class Create_func_numgeometries : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_numgeometries s_singleton;
 
@@ -1586,7 +1650,7 @@ protected:
 class Create_func_numinteriorring : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_numinteriorring s_singleton;
 
@@ -1601,7 +1665,7 @@ protected:
 class Create_func_numpoints : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_numpoints s_singleton;
 
@@ -1615,7 +1679,7 @@ protected:
 class Create_func_oct : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_oct s_singleton;
 
@@ -1628,7 +1692,7 @@ protected:
 class Create_func_ord : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ord s_singleton;
 
@@ -1642,7 +1706,7 @@ protected:
 class Create_func_overlaps : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_overlaps s_singleton;
 
@@ -1656,7 +1720,7 @@ protected:
 class Create_func_period_add : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_period_add s_singleton;
 
@@ -1669,7 +1733,7 @@ protected:
 class Create_func_period_diff : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_period_diff s_singleton;
 
@@ -1682,7 +1746,7 @@ protected:
 class Create_func_pi : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_pi s_singleton;
 
@@ -1696,7 +1760,7 @@ protected:
 class Create_func_pointn : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_pointn s_singleton;
 
@@ -1710,7 +1774,7 @@ protected:
 class Create_func_pow : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_pow s_singleton;
 
@@ -1723,7 +1787,7 @@ protected:
 class Create_func_quote : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_quote s_singleton;
 
@@ -1736,7 +1800,7 @@ protected:
 class Create_func_radians : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_radians s_singleton;
 
@@ -1762,7 +1826,7 @@ protected:
 class Create_func_release_lock : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_release_lock s_singleton;
 
@@ -1775,7 +1839,7 @@ protected:
 class Create_func_reverse : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_reverse s_singleton;
 
@@ -1801,7 +1865,7 @@ protected:
 class Create_func_row_count : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_row_count s_singleton;
 
@@ -1814,7 +1878,7 @@ protected:
 class Create_func_rpad : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_rpad s_singleton;
 
@@ -1827,7 +1891,7 @@ protected:
 class Create_func_rtrim : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_rtrim s_singleton;
 
@@ -1840,7 +1904,7 @@ protected:
 class Create_func_sec_to_time : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sec_to_time s_singleton;
 
@@ -1853,7 +1917,7 @@ protected:
 class Create_func_sha : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sha s_singleton;
 
@@ -1866,7 +1930,7 @@ protected:
 class Create_func_sha2 : public Create_func_arg2
 {
 public:
-  virtual Item* create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item* create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_sha2 s_singleton;
 
@@ -1879,7 +1943,7 @@ protected:
 class Create_func_sign : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sign s_singleton;
 
@@ -1892,7 +1956,7 @@ protected:
 class Create_func_sin : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sin s_singleton;
 
@@ -1905,7 +1969,7 @@ protected:
 class Create_func_sleep : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sleep s_singleton;
 
@@ -1918,7 +1982,7 @@ protected:
 class Create_func_soundex : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_soundex s_singleton;
 
@@ -1931,7 +1995,7 @@ protected:
 class Create_func_space : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_space s_singleton;
 
@@ -1944,7 +2008,7 @@ protected:
 class Create_func_sqrt : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sqrt s_singleton;
 
@@ -1958,7 +2022,7 @@ protected:
 class Create_func_srid : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_srid s_singleton;
 
@@ -1973,7 +2037,7 @@ protected:
 class Create_func_startpoint : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_startpoint s_singleton;
 
@@ -1987,7 +2051,7 @@ protected:
 class Create_func_str_to_date : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_str_to_date s_singleton;
 
@@ -2000,7 +2064,7 @@ protected:
 class Create_func_strcmp : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_strcmp s_singleton;
 
@@ -2013,7 +2077,7 @@ protected:
 class Create_func_substr_index : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_substr_index s_singleton;
 
@@ -2026,7 +2090,7 @@ protected:
 class Create_func_subtime : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_subtime s_singleton;
 
@@ -2039,7 +2103,7 @@ protected:
 class Create_func_tan : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_tan s_singleton;
 
@@ -2052,7 +2116,7 @@ protected:
 class Create_func_time_format : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_time_format s_singleton;
 
@@ -2065,7 +2129,7 @@ protected:
 class Create_func_time_to_sec : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_time_to_sec s_singleton;
 
@@ -2078,7 +2142,7 @@ protected:
 class Create_func_timediff : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_timediff s_singleton;
 
@@ -2091,7 +2155,7 @@ protected:
 class Create_func_to_days : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_to_days s_singleton;
 
@@ -2103,7 +2167,7 @@ protected:
 class Create_func_to_seconds : public Create_func_arg1
 {
 public:
-  virtual Item* create(THD *thd, Item *arg1);
+  virtual Item* create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_to_seconds s_singleton;
 
@@ -2117,7 +2181,7 @@ protected:
 class Create_func_touches : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_touches s_singleton;
 
@@ -2131,7 +2195,7 @@ protected:
 class Create_func_ucase : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ucase s_singleton;
 
@@ -2144,7 +2208,7 @@ protected:
 class Create_func_uncompress : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_uncompress s_singleton;
 
@@ -2157,7 +2221,7 @@ protected:
 class Create_func_uncompressed_length : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_uncompressed_length s_singleton;
 
@@ -2170,7 +2234,7 @@ protected:
 class Create_func_unhex : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_unhex s_singleton;
 
@@ -2196,7 +2260,7 @@ protected:
 class Create_func_uuid : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_uuid s_singleton;
 
@@ -2209,7 +2273,7 @@ protected:
 class Create_func_uuid_short : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_uuid_short s_singleton;
 
@@ -2222,7 +2286,7 @@ protected:
 class Create_func_version : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_version s_singleton;
 
@@ -2235,7 +2299,7 @@ protected:
 class Create_func_weekday : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_weekday s_singleton;
 
@@ -2248,7 +2312,7 @@ protected:
 class Create_func_weekofyear : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_weekofyear s_singleton;
 
@@ -2262,7 +2326,7 @@ protected:
 class Create_func_within : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_within s_singleton;
 
@@ -2277,7 +2341,7 @@ protected:
 class Create_func_x : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_x s_singleton;
 
@@ -2291,7 +2355,7 @@ protected:
 class Create_func_xml_extractvalue : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_xml_extractvalue s_singleton;
 
@@ -2304,7 +2368,7 @@ protected:
 class Create_func_xml_update : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_xml_update s_singleton;
 
@@ -2318,7 +2382,7 @@ protected:
 class Create_func_y : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_y s_singleton;
 
@@ -2413,7 +2477,7 @@ Create_qfunc::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
   if (thd->lex->copy_db_to(&db.str, &db.length))
     return NULL;
 
-  return create(thd, db, name, false, item_list);
+  return create_with_db(thd, db, name, false, item_list);
 }
 
 
@@ -2531,8 +2595,8 @@ Create_udf_func::create(THD *thd, udf_func *udf, List<Item> *item_list)
 Create_sp_func Create_sp_func::s_singleton;
 
 Item*
-Create_sp_func::create(THD *thd, LEX_STRING db, LEX_STRING name,
-                       bool use_explicit_name, List<Item> *item_list)
+Create_sp_func::create_with_db(THD *thd, LEX_STRING db, LEX_STRING name,
+                               bool use_explicit_name, List<Item> *item_list)
 {
   int arg_count= 0;
   Item *func= NULL;
@@ -2599,7 +2663,7 @@ Create_func_arg0::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
     return NULL;
   }
 
-  return create(thd);
+  return create_builder(thd);
 }
 
 
@@ -2625,7 +2689,7 @@ Create_func_arg1::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
     return NULL;
   }
 
-  return create(thd, param_1);
+  return create_1_arg(thd, param_1);
 }
 
 
@@ -2653,7 +2717,7 @@ Create_func_arg2::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
     return NULL;
   }
 
-  return create(thd, param_1, param_2);
+  return create_2_arg(thd, param_1, param_2);
 }
 
 
@@ -2683,14 +2747,14 @@ Create_func_arg3::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
     return NULL;
   }
 
-  return create(thd, param_1, param_2, param_3);
+  return create_3_arg(thd, param_1, param_2, param_3);
 }
 
 
 Create_func_abs Create_func_abs::s_singleton;
 
 Item*
-Create_func_abs::create(THD *thd, Item *arg1)
+Create_func_abs::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_abs(arg1);
 }
@@ -2699,7 +2763,7 @@ Create_func_abs::create(THD *thd, Item *arg1)
 Create_func_acos Create_func_acos::s_singleton;
 
 Item*
-Create_func_acos::create(THD *thd, Item *arg1)
+Create_func_acos::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_acos(arg1);
 }
@@ -2708,7 +2772,7 @@ Create_func_acos::create(THD *thd, Item *arg1)
 Create_func_addtime Create_func_addtime::s_singleton;
 
 Item*
-Create_func_addtime::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_addtime::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_add_time(arg1, arg2, 0, 0);
 }
@@ -2717,7 +2781,7 @@ Create_func_addtime::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_aes_encrypt Create_func_aes_encrypt::s_singleton;
 
 Item*
-Create_func_aes_encrypt::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_aes_encrypt::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_aes_encrypt(arg1, arg2);
 }
@@ -2726,7 +2790,7 @@ Create_func_aes_encrypt::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_aes_decrypt Create_func_aes_decrypt::s_singleton;
 
 Item*
-Create_func_aes_decrypt::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_aes_decrypt::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_aes_decrypt(arg1, arg2);
 }
@@ -2736,7 +2800,7 @@ Create_func_aes_decrypt::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_area Create_func_area::s_singleton;
 
 Item*
-Create_func_area::create(THD *thd, Item *arg1)
+Create_func_area::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_area(arg1);
 }
@@ -2747,7 +2811,7 @@ Create_func_area::create(THD *thd, Item *arg1)
 Create_func_as_wkb Create_func_as_wkb::s_singleton;
 
 Item*
-Create_func_as_wkb::create(THD *thd, Item *arg1)
+Create_func_as_wkb::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_as_wkb(arg1);
 }
@@ -2758,7 +2822,7 @@ Create_func_as_wkb::create(THD *thd, Item *arg1)
 Create_func_as_wkt Create_func_as_wkt::s_singleton;
 
 Item*
-Create_func_as_wkt::create(THD *thd, Item *arg1)
+Create_func_as_wkt::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_as_wkt(arg1);
 }
@@ -2768,7 +2832,7 @@ Create_func_as_wkt::create(THD *thd, Item *arg1)
 Create_func_asin Create_func_asin::s_singleton;
 
 Item*
-Create_func_asin::create(THD *thd, Item *arg1)
+Create_func_asin::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_asin(arg1);
 }
@@ -2814,7 +2878,7 @@ Create_func_atan::create_native(THD *thd, LEX_STRING name,
 Create_func_benchmark Create_func_benchmark::s_singleton;
 
 Item*
-Create_func_benchmark::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_benchmark::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
   return new (thd->mem_root) Item_func_benchmark(arg1, arg2);
@@ -2824,7 +2888,7 @@ Create_func_benchmark::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_bin Create_func_bin::s_singleton;
 
 Item*
-Create_func_bin::create(THD *thd, Item *arg1)
+Create_func_bin::create_1_arg(THD *thd, Item *arg1)
 {
   Item *i10= new (thd->mem_root) Item_int((int32) 10,2);
   Item *i2= new (thd->mem_root) Item_int((int32) 2,1);
@@ -2835,7 +2899,7 @@ Create_func_bin::create(THD *thd, Item *arg1)
 Create_func_bit_count Create_func_bit_count::s_singleton;
 
 Item*
-Create_func_bit_count::create(THD *thd, Item *arg1)
+Create_func_bit_count::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_bit_count(arg1);
 }
@@ -2844,7 +2908,7 @@ Create_func_bit_count::create(THD *thd, Item *arg1)
 Create_func_bit_length Create_func_bit_length::s_singleton;
 
 Item*
-Create_func_bit_length::create(THD *thd, Item *arg1)
+Create_func_bit_length::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_bit_length(arg1);
 }
@@ -2853,7 +2917,7 @@ Create_func_bit_length::create(THD *thd, Item *arg1)
 Create_func_ceiling Create_func_ceiling::s_singleton;
 
 Item*
-Create_func_ceiling::create(THD *thd, Item *arg1)
+Create_func_ceiling::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ceiling(arg1);
 }
@@ -2863,7 +2927,7 @@ Create_func_ceiling::create(THD *thd, Item *arg1)
 Create_func_centroid Create_func_centroid::s_singleton;
 
 Item*
-Create_func_centroid::create(THD *thd, Item *arg1)
+Create_func_centroid::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_centroid(arg1);
 }
@@ -2873,7 +2937,7 @@ Create_func_centroid::create(THD *thd, Item *arg1)
 Create_func_char_length Create_func_char_length::s_singleton;
 
 Item*
-Create_func_char_length::create(THD *thd, Item *arg1)
+Create_func_char_length::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_char_length(arg1);
 }
@@ -2882,7 +2946,7 @@ Create_func_char_length::create(THD *thd, Item *arg1)
 Create_func_coercibility Create_func_coercibility::s_singleton;
 
 Item*
-Create_func_coercibility::create(THD *thd, Item *arg1)
+Create_func_coercibility::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_coercibility(arg1);
 }
@@ -2934,7 +2998,7 @@ Create_func_concat_ws::create_native(THD *thd, LEX_STRING name,
 Create_func_compress Create_func_compress::s_singleton;
 
 Item*
-Create_func_compress::create(THD *thd, Item *arg1)
+Create_func_compress::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_compress(arg1);
 }
@@ -2943,7 +3007,7 @@ Create_func_compress::create(THD *thd, Item *arg1)
 Create_func_connection_id Create_func_connection_id::s_singleton;
 
 Item*
-Create_func_connection_id::create(THD *thd)
+Create_func_connection_id::create_builder(THD *thd)
 {
   thd->lex->safe_to_cache_query= 0;
   return new (thd->mem_root) Item_func_connection_id();
@@ -2954,7 +3018,7 @@ Create_func_connection_id::create(THD *thd)
 Create_func_contains Create_func_contains::s_singleton;
 
 Item*
-Create_func_contains::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_contains::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_CONTAINS_FUNC);
@@ -2965,7 +3029,7 @@ Create_func_contains::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_conv Create_func_conv::s_singleton;
 
 Item*
-Create_func_conv::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_conv::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_conv(arg1, arg2, arg3);
 }
@@ -2974,7 +3038,7 @@ Create_func_conv::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_convert_tz Create_func_convert_tz::s_singleton;
 
 Item*
-Create_func_convert_tz::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_convert_tz::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_convert_tz(arg1, arg2, arg3);
 }
@@ -2983,7 +3047,7 @@ Create_func_convert_tz::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_cos Create_func_cos::s_singleton;
 
 Item*
-Create_func_cos::create(THD *thd, Item *arg1)
+Create_func_cos::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_cos(arg1);
 }
@@ -2992,7 +3056,7 @@ Create_func_cos::create(THD *thd, Item *arg1)
 Create_func_cot Create_func_cot::s_singleton;
 
 Item*
-Create_func_cot::create(THD *thd, Item *arg1)
+Create_func_cot::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_cot(arg1);
 }
@@ -3001,7 +3065,7 @@ Create_func_cot::create(THD *thd, Item *arg1)
 Create_func_crc32 Create_func_crc32::s_singleton;
 
 Item*
-Create_func_crc32::create(THD *thd, Item *arg1)
+Create_func_crc32::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_crc32(arg1);
 }
@@ -3011,7 +3075,7 @@ Create_func_crc32::create(THD *thd, Item *arg1)
 Create_func_crosses Create_func_crosses::s_singleton;
 
 Item*
-Create_func_crosses::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_crosses::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_CROSSES_FUNC);
@@ -3022,7 +3086,7 @@ Create_func_crosses::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_date_format Create_func_date_format::s_singleton;
 
 Item*
-Create_func_date_format::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_date_format::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_date_format(arg1, arg2, 0);
 }
@@ -3031,7 +3095,7 @@ Create_func_date_format::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_datediff Create_func_datediff::s_singleton;
 
 Item*
-Create_func_datediff::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_datediff::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   Item *i1= new (thd->mem_root) Item_func_to_days(arg1);
   Item *i2= new (thd->mem_root) Item_func_to_days(arg2);
@@ -3043,7 +3107,7 @@ Create_func_datediff::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_dayname Create_func_dayname::s_singleton;
 
 Item*
-Create_func_dayname::create(THD *thd, Item *arg1)
+Create_func_dayname::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_dayname(arg1);
 }
@@ -3052,7 +3116,7 @@ Create_func_dayname::create(THD *thd, Item *arg1)
 Create_func_dayofmonth Create_func_dayofmonth::s_singleton;
 
 Item*
-Create_func_dayofmonth::create(THD *thd, Item *arg1)
+Create_func_dayofmonth::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_dayofmonth(arg1);
 }
@@ -3061,7 +3125,7 @@ Create_func_dayofmonth::create(THD *thd, Item *arg1)
 Create_func_dayofweek Create_func_dayofweek::s_singleton;
 
 Item*
-Create_func_dayofweek::create(THD *thd, Item *arg1)
+Create_func_dayofweek::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_weekday(arg1, 1);
 }
@@ -3070,7 +3134,7 @@ Create_func_dayofweek::create(THD *thd, Item *arg1)
 Create_func_dayofyear Create_func_dayofyear::s_singleton;
 
 Item*
-Create_func_dayofyear::create(THD *thd, Item *arg1)
+Create_func_dayofyear::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_dayofyear(arg1);
 }
@@ -3079,7 +3143,7 @@ Create_func_dayofyear::create(THD *thd, Item *arg1)
 Create_func_decode Create_func_decode::s_singleton;
 
 Item*
-Create_func_decode::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_decode::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_decode(arg1, arg2);
 }
@@ -3088,7 +3152,7 @@ Create_func_decode::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_degrees Create_func_degrees::s_singleton;
 
 Item*
-Create_func_degrees::create(THD *thd, Item *arg1)
+Create_func_degrees::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_units((char*) "degrees", arg1,
                                              180/M_PI, 0.0);
@@ -3173,7 +3237,7 @@ Create_func_des_encrypt::create_native(THD *thd, LEX_STRING name,
 Create_func_dimension Create_func_dimension::s_singleton;
 
 Item*
-Create_func_dimension::create(THD *thd, Item *arg1)
+Create_func_dimension::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_dimension(arg1);
 }
@@ -3184,7 +3248,7 @@ Create_func_dimension::create(THD *thd, Item *arg1)
 Create_func_disjoint Create_func_disjoint::s_singleton;
 
 Item*
-Create_func_disjoint::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_disjoint::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_DISJOINT_FUNC);
@@ -3216,7 +3280,7 @@ Create_func_elt::create_native(THD *thd, LEX_STRING name,
 Create_func_encode Create_func_encode::s_singleton;
 
 Item*
-Create_func_encode::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_encode::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_encode(arg1, arg2);
 }
@@ -3264,7 +3328,7 @@ Create_func_encrypt::create_native(THD *thd, LEX_STRING name,
 Create_func_endpoint Create_func_endpoint::s_singleton;
 
 Item*
-Create_func_endpoint::create(THD *thd, Item *arg1)
+Create_func_endpoint::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_spatial_decomp(arg1,
                                                       Item_func::SP_ENDPOINT);
@@ -3276,7 +3340,7 @@ Create_func_endpoint::create(THD *thd, Item *arg1)
 Create_func_envelope Create_func_envelope::s_singleton;
 
 Item*
-Create_func_envelope::create(THD *thd, Item *arg1)
+Create_func_envelope::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_envelope(arg1);
 }
@@ -3287,7 +3351,7 @@ Create_func_envelope::create(THD *thd, Item *arg1)
 Create_func_equals Create_func_equals::s_singleton;
 
 Item*
-Create_func_equals::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_equals::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_EQUALS_FUNC);
@@ -3298,7 +3362,7 @@ Create_func_equals::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_exp Create_func_exp::s_singleton;
 
 Item*
-Create_func_exp::create(THD *thd, Item *arg1)
+Create_func_exp::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_exp(arg1);
 }
@@ -3361,7 +3425,7 @@ Create_func_export_set::create_native(THD *thd, LEX_STRING name,
 Create_func_exteriorring Create_func_exteriorring::s_singleton;
 
 Item*
-Create_func_exteriorring::create(THD *thd, Item *arg1)
+Create_func_exteriorring::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_spatial_decomp(arg1,
                                                       Item_func::SP_EXTERIORRING);
@@ -3393,7 +3457,7 @@ Create_func_field::create_native(THD *thd, LEX_STRING name,
 Create_func_find_in_set Create_func_find_in_set::s_singleton;
 
 Item*
-Create_func_find_in_set::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_find_in_set::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_find_in_set(arg1, arg2);
 }
@@ -3402,7 +3466,7 @@ Create_func_find_in_set::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_floor Create_func_floor::s_singleton;
 
 Item*
-Create_func_floor::create(THD *thd, Item *arg1)
+Create_func_floor::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_floor(arg1);
 }
@@ -3445,7 +3509,7 @@ Create_func_format::create_native(THD *thd, LEX_STRING name,
 Create_func_found_rows Create_func_found_rows::s_singleton;
 
 Item*
-Create_func_found_rows::create(THD *thd)
+Create_func_found_rows::create_builder(THD *thd)
 {
   DBUG_ENTER("Create_func_found_rows::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -3457,7 +3521,7 @@ Create_func_found_rows::create(THD *thd)
 Create_func_from_days Create_func_from_days::s_singleton;
 
 Item*
-Create_func_from_days::create(THD *thd, Item *arg1)
+Create_func_from_days::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_from_days(arg1);
 }
@@ -3585,7 +3649,7 @@ Create_func_geometry_from_wkb::create_native(THD *thd, LEX_STRING name,
 Create_func_geometry_type Create_func_geometry_type::s_singleton;
 
 Item*
-Create_func_geometry_type::create(THD *thd, Item *arg1)
+Create_func_geometry_type::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_geometry_type(arg1);
 }
@@ -3596,7 +3660,7 @@ Create_func_geometry_type::create(THD *thd, Item *arg1)
 Create_func_geometryn Create_func_geometryn::s_singleton;
 
 Item*
-Create_func_geometryn::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_geometryn::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_decomp_n(arg1, arg2,
                                                         Item_func::SP_GEOMETRYN);
@@ -3607,7 +3671,7 @@ Create_func_geometryn::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_get_lock Create_func_get_lock::s_singleton;
 
 Item*
-Create_func_get_lock::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_get_lock::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -3619,7 +3683,7 @@ Create_func_get_lock::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_glength Create_func_glength::s_singleton;
 
 Item*
-Create_func_glength::create(THD *thd, Item *arg1)
+Create_func_glength::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_glength(arg1);
 }
@@ -3650,7 +3714,7 @@ Create_func_greatest::create_native(THD *thd, LEX_STRING name,
 Create_func_hex Create_func_hex::s_singleton;
 
 Item*
-Create_func_hex::create(THD *thd, Item *arg1)
+Create_func_hex::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_hex(arg1);
 }
@@ -3659,7 +3723,7 @@ Create_func_hex::create(THD *thd, Item *arg1)
 Create_func_ifnull Create_func_ifnull::s_singleton;
 
 Item*
-Create_func_ifnull::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_ifnull::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_ifnull(arg1, arg2);
 }
@@ -3668,7 +3732,7 @@ Create_func_ifnull::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_inet_ntoa Create_func_inet_ntoa::s_singleton;
 
 Item*
-Create_func_inet_ntoa::create(THD *thd, Item *arg1)
+Create_func_inet_ntoa::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_inet_ntoa(arg1);
 }
@@ -3677,7 +3741,7 @@ Create_func_inet_ntoa::create(THD *thd, Item *arg1)
 Create_func_inet_aton Create_func_inet_aton::s_singleton;
 
 Item*
-Create_func_inet_aton::create(THD *thd, Item *arg1)
+Create_func_inet_aton::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_inet_aton(arg1);
 }
@@ -3686,7 +3750,7 @@ Create_func_inet_aton::create(THD *thd, Item *arg1)
 Create_func_instr Create_func_instr::s_singleton;
 
 Item*
-Create_func_instr::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_instr::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_locate(arg1, arg2);
 }
@@ -3696,7 +3760,7 @@ Create_func_instr::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_interiorringn Create_func_interiorringn::s_singleton;
 
 Item*
-Create_func_interiorringn::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_interiorringn::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_decomp_n(arg1, arg2,
                                                         Item_func::SP_INTERIORRINGN);
@@ -3708,7 +3772,7 @@ Create_func_interiorringn::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_intersects Create_func_intersects::s_singleton;
 
 Item*
-Create_func_intersects::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_intersects::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_INTERSECTS_FUNC);
@@ -3719,7 +3783,7 @@ Create_func_intersects::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_is_free_lock Create_func_is_free_lock::s_singleton;
 
 Item*
-Create_func_is_free_lock::create(THD *thd, Item *arg1)
+Create_func_is_free_lock::create_1_arg(THD *thd, Item *arg1)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -3730,7 +3794,7 @@ Create_func_is_free_lock::create(THD *thd, Item *arg1)
 Create_func_is_used_lock Create_func_is_used_lock::s_singleton;
 
 Item*
-Create_func_is_used_lock::create(THD *thd, Item *arg1)
+Create_func_is_used_lock::create_1_arg(THD *thd, Item *arg1)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -3742,7 +3806,7 @@ Create_func_is_used_lock::create(THD *thd, Item *arg1)
 Create_func_isclosed Create_func_isclosed::s_singleton;
 
 Item*
-Create_func_isclosed::create(THD *thd, Item *arg1)
+Create_func_isclosed::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_isclosed(arg1);
 }
@@ -3753,7 +3817,7 @@ Create_func_isclosed::create(THD *thd, Item *arg1)
 Create_func_isempty Create_func_isempty::s_singleton;
 
 Item*
-Create_func_isempty::create(THD *thd, Item *arg1)
+Create_func_isempty::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_isempty(arg1);
 }
@@ -3763,7 +3827,7 @@ Create_func_isempty::create(THD *thd, Item *arg1)
 Create_func_isnull Create_func_isnull::s_singleton;
 
 Item*
-Create_func_isnull::create(THD *thd, Item *arg1)
+Create_func_isnull::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_isnull(arg1);
 }
@@ -3773,7 +3837,7 @@ Create_func_isnull::create(THD *thd, Item *arg1)
 Create_func_issimple Create_func_issimple::s_singleton;
 
 Item*
-Create_func_issimple::create(THD *thd, Item *arg1)
+Create_func_issimple::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_issimple(arg1);
 }
@@ -3783,7 +3847,7 @@ Create_func_issimple::create(THD *thd, Item *arg1)
 Create_func_last_day Create_func_last_day::s_singleton;
 
 Item*
-Create_func_last_day::create(THD *thd, Item *arg1)
+Create_func_last_day::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_last_day(arg1);
 }
@@ -3829,7 +3893,7 @@ Create_func_last_insert_id::create_native(THD *thd, LEX_STRING name,
 Create_func_lcase Create_func_lcase::s_singleton;
 
 Item*
-Create_func_lcase::create(THD *thd, Item *arg1)
+Create_func_lcase::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_lcase(arg1);
 }
@@ -3859,7 +3923,7 @@ Create_func_least::create_native(THD *thd, LEX_STRING name,
 Create_func_length Create_func_length::s_singleton;
 
 Item*
-Create_func_length::create(THD *thd, Item *arg1)
+Create_func_length::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_length(arg1);
 }
@@ -3869,7 +3933,7 @@ Create_func_length::create(THD *thd, Item *arg1)
 Create_func_like_range_min Create_func_like_range_min::s_singleton;
 
 Item*
-Create_func_like_range_min::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_like_range_min::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_like_range_min(arg1, arg2);
 }
@@ -3878,7 +3942,7 @@ Create_func_like_range_min::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_like_range_max Create_func_like_range_max::s_singleton;
 
 Item*
-Create_func_like_range_max::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_like_range_max::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_like_range_max(arg1, arg2);
 }
@@ -3888,7 +3952,7 @@ Create_func_like_range_max::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_ln Create_func_ln::s_singleton;
 
 Item*
-Create_func_ln::create(THD *thd, Item *arg1)
+Create_func_ln::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ln(arg1);
 }
@@ -3897,7 +3961,7 @@ Create_func_ln::create(THD *thd, Item *arg1)
 Create_func_load_file Create_func_load_file::s_singleton;
 
 Item*
-Create_func_load_file::create(THD *thd, Item *arg1)
+Create_func_load_file::create_1_arg(THD *thd, Item *arg1)
 {
   DBUG_ENTER("Create_func_load_file::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -3987,7 +4051,7 @@ Create_func_log::create_native(THD *thd, LEX_STRING name,
 Create_func_log10 Create_func_log10::s_singleton;
 
 Item*
-Create_func_log10::create(THD *thd, Item *arg1)
+Create_func_log10::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_log10(arg1);
 }
@@ -3996,7 +4060,7 @@ Create_func_log10::create(THD *thd, Item *arg1)
 Create_func_log2 Create_func_log2::s_singleton;
 
 Item*
-Create_func_log2::create(THD *thd, Item *arg1)
+Create_func_log2::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_log2(arg1);
 }
@@ -4005,7 +4069,7 @@ Create_func_log2::create(THD *thd, Item *arg1)
 Create_func_lpad Create_func_lpad::s_singleton;
 
 Item*
-Create_func_lpad::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_lpad::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_lpad(arg1, arg2, arg3);
 }
@@ -4014,7 +4078,7 @@ Create_func_lpad::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_ltrim Create_func_ltrim::s_singleton;
 
 Item*
-Create_func_ltrim::create(THD *thd, Item *arg1)
+Create_func_ltrim::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ltrim(arg1);
 }
@@ -4023,7 +4087,7 @@ Create_func_ltrim::create(THD *thd, Item *arg1)
 Create_func_makedate Create_func_makedate::s_singleton;
 
 Item*
-Create_func_makedate::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_makedate::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_makedate(arg1, arg2);
 }
@@ -4032,7 +4096,7 @@ Create_func_makedate::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_maketime Create_func_maketime::s_singleton;
 
 Item*
-Create_func_maketime::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_maketime::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_maketime(arg1, arg2, arg3);
 }
@@ -4107,7 +4171,7 @@ Create_func_master_pos_wait::create_native(THD *thd, LEX_STRING name,
 Create_func_md5 Create_func_md5::s_singleton;
 
 Item*
-Create_func_md5::create(THD *thd, Item *arg1)
+Create_func_md5::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_md5(arg1);
 }
@@ -4116,7 +4180,7 @@ Create_func_md5::create(THD *thd, Item *arg1)
 Create_func_monthname Create_func_monthname::s_singleton;
 
 Item*
-Create_func_monthname::create(THD *thd, Item *arg1)
+Create_func_monthname::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_monthname(arg1);
 }
@@ -4125,7 +4189,7 @@ Create_func_monthname::create(THD *thd, Item *arg1)
 Create_func_name_const Create_func_name_const::s_singleton;
 
 Item*
-Create_func_name_const::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_name_const::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_name_const(arg1, arg2);
 }
@@ -4134,7 +4198,7 @@ Create_func_name_const::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_nullif Create_func_nullif::s_singleton;
 
 Item*
-Create_func_nullif::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_nullif::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_nullif(arg1, arg2);
 }
@@ -4144,7 +4208,7 @@ Create_func_nullif::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_numgeometries Create_func_numgeometries::s_singleton;
 
 Item*
-Create_func_numgeometries::create(THD *thd, Item *arg1)
+Create_func_numgeometries::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_numgeometries(arg1);
 }
@@ -4155,7 +4219,7 @@ Create_func_numgeometries::create(THD *thd, Item *arg1)
 Create_func_numinteriorring Create_func_numinteriorring::s_singleton;
 
 Item*
-Create_func_numinteriorring::create(THD *thd, Item *arg1)
+Create_func_numinteriorring::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_numinteriorring(arg1);
 }
@@ -4166,7 +4230,7 @@ Create_func_numinteriorring::create(THD *thd, Item *arg1)
 Create_func_numpoints Create_func_numpoints::s_singleton;
 
 Item*
-Create_func_numpoints::create(THD *thd, Item *arg1)
+Create_func_numpoints::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_numpoints(arg1);
 }
@@ -4176,7 +4240,7 @@ Create_func_numpoints::create(THD *thd, Item *arg1)
 Create_func_oct Create_func_oct::s_singleton;
 
 Item*
-Create_func_oct::create(THD *thd, Item *arg1)
+Create_func_oct::create_1_arg(THD *thd, Item *arg1)
 {
   Item *i10= new (thd->mem_root) Item_int((int32) 10,2);
   Item *i8= new (thd->mem_root) Item_int((int32) 8,1);
@@ -4187,7 +4251,7 @@ Create_func_oct::create(THD *thd, Item *arg1)
 Create_func_ord Create_func_ord::s_singleton;
 
 Item*
-Create_func_ord::create(THD *thd, Item *arg1)
+Create_func_ord::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ord(arg1);
 }
@@ -4197,7 +4261,7 @@ Create_func_ord::create(THD *thd, Item *arg1)
 Create_func_overlaps Create_func_overlaps::s_singleton;
 
 Item*
-Create_func_overlaps::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_overlaps::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_OVERLAPS_FUNC);
@@ -4208,7 +4272,7 @@ Create_func_overlaps::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_period_add Create_func_period_add::s_singleton;
 
 Item*
-Create_func_period_add::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_period_add::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_period_add(arg1, arg2);
 }
@@ -4217,7 +4281,7 @@ Create_func_period_add::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_period_diff Create_func_period_diff::s_singleton;
 
 Item*
-Create_func_period_diff::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_period_diff::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_period_diff(arg1, arg2);
 }
@@ -4226,7 +4290,7 @@ Create_func_period_diff::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_pi Create_func_pi::s_singleton;
 
 Item*
-Create_func_pi::create(THD *thd)
+Create_func_pi::create_builder(THD *thd)
 {
   return new (thd->mem_root) Item_static_float_func("pi()", M_PI, 6, 8);
 }
@@ -4236,7 +4300,7 @@ Create_func_pi::create(THD *thd)
 Create_func_pointn Create_func_pointn::s_singleton;
 
 Item*
-Create_func_pointn::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_pointn::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_decomp_n(arg1, arg2,
                                                         Item_func::SP_POINTN);
@@ -4247,7 +4311,7 @@ Create_func_pointn::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_pow Create_func_pow::s_singleton;
 
 Item*
-Create_func_pow::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_pow::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_pow(arg1, arg2);
 }
@@ -4256,7 +4320,7 @@ Create_func_pow::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_quote Create_func_quote::s_singleton;
 
 Item*
-Create_func_quote::create(THD *thd, Item *arg1)
+Create_func_quote::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_quote(arg1);
 }
@@ -4265,7 +4329,7 @@ Create_func_quote::create(THD *thd, Item *arg1)
 Create_func_radians Create_func_radians::s_singleton;
 
 Item*
-Create_func_radians::create(THD *thd, Item *arg1)
+Create_func_radians::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_units((char*) "radians", arg1,
                                              M_PI/180, 0.0);
@@ -4291,8 +4355,11 @@ Create_func_rand::create_native(THD *thd, LEX_STRING name,
     into a table, the order in which the rows are modified may differ
     between master and slave, because the order is undefined.  Hence,
     the statement is unsafe to log in statement format.
+
+    For normal INSERT's this is howevever safe
   */
-  thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
+  if (thd->lex->sql_command != SQLCOM_INSERT)
+    thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
 
   switch (arg_count) {
   case 0:
@@ -4322,7 +4389,7 @@ Create_func_rand::create_native(THD *thd, LEX_STRING name,
 Create_func_release_lock Create_func_release_lock::s_singleton;
 
 Item*
-Create_func_release_lock::create(THD *thd, Item *arg1)
+Create_func_release_lock::create_1_arg(THD *thd, Item *arg1)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -4333,7 +4400,7 @@ Create_func_release_lock::create(THD *thd, Item *arg1)
 Create_func_reverse Create_func_reverse::s_singleton;
 
 Item*
-Create_func_reverse::create(THD *thd, Item *arg1)
+Create_func_reverse::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_reverse(arg1);
 }
@@ -4380,7 +4447,7 @@ Create_func_round::create_native(THD *thd, LEX_STRING name,
 Create_func_row_count Create_func_row_count::s_singleton;
 
 Item*
-Create_func_row_count::create(THD *thd)
+Create_func_row_count::create_builder(THD *thd)
 {
   DBUG_ENTER("Create_func_row_count::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -4392,7 +4459,7 @@ Create_func_row_count::create(THD *thd)
 Create_func_rpad Create_func_rpad::s_singleton;
 
 Item*
-Create_func_rpad::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_rpad::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_rpad(arg1, arg2, arg3);
 }
@@ -4401,7 +4468,7 @@ Create_func_rpad::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_rtrim Create_func_rtrim::s_singleton;
 
 Item*
-Create_func_rtrim::create(THD *thd, Item *arg1)
+Create_func_rtrim::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_rtrim(arg1);
 }
@@ -4410,7 +4477,7 @@ Create_func_rtrim::create(THD *thd, Item *arg1)
 Create_func_sec_to_time Create_func_sec_to_time::s_singleton;
 
 Item*
-Create_func_sec_to_time::create(THD *thd, Item *arg1)
+Create_func_sec_to_time::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sec_to_time(arg1);
 }
@@ -4419,7 +4486,7 @@ Create_func_sec_to_time::create(THD *thd, Item *arg1)
 Create_func_sha Create_func_sha::s_singleton;
 
 Item*
-Create_func_sha::create(THD *thd, Item *arg1)
+Create_func_sha::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sha(arg1);
 }
@@ -4428,7 +4495,7 @@ Create_func_sha::create(THD *thd, Item *arg1)
 Create_func_sha2 Create_func_sha2::s_singleton;
 
 Item*
-Create_func_sha2::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_sha2::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_sha2(arg1, arg2);
 }
@@ -4437,7 +4504,7 @@ Create_func_sha2::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_sign Create_func_sign::s_singleton;
 
 Item*
-Create_func_sign::create(THD *thd, Item *arg1)
+Create_func_sign::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sign(arg1);
 }
@@ -4446,7 +4513,7 @@ Create_func_sign::create(THD *thd, Item *arg1)
 Create_func_sin Create_func_sin::s_singleton;
 
 Item*
-Create_func_sin::create(THD *thd, Item *arg1)
+Create_func_sin::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sin(arg1);
 }
@@ -4455,7 +4522,7 @@ Create_func_sin::create(THD *thd, Item *arg1)
 Create_func_sleep Create_func_sleep::s_singleton;
 
 Item*
-Create_func_sleep::create(THD *thd, Item *arg1)
+Create_func_sleep::create_1_arg(THD *thd, Item *arg1)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -4466,7 +4533,7 @@ Create_func_sleep::create(THD *thd, Item *arg1)
 Create_func_soundex Create_func_soundex::s_singleton;
 
 Item*
-Create_func_soundex::create(THD *thd, Item *arg1)
+Create_func_soundex::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_soundex(arg1);
 }
@@ -4475,7 +4542,7 @@ Create_func_soundex::create(THD *thd, Item *arg1)
 Create_func_space Create_func_space::s_singleton;
 
 Item*
-Create_func_space::create(THD *thd, Item *arg1)
+Create_func_space::create_1_arg(THD *thd, Item *arg1)
 {
   /**
     TODO: Fix Bug#23637
@@ -4503,7 +4570,7 @@ Create_func_space::create(THD *thd, Item *arg1)
 Create_func_sqrt Create_func_sqrt::s_singleton;
 
 Item*
-Create_func_sqrt::create(THD *thd, Item *arg1)
+Create_func_sqrt::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sqrt(arg1);
 }
@@ -4513,7 +4580,7 @@ Create_func_sqrt::create(THD *thd, Item *arg1)
 Create_func_srid Create_func_srid::s_singleton;
 
 Item*
-Create_func_srid::create(THD *thd, Item *arg1)
+Create_func_srid::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_srid(arg1);
 }
@@ -4524,7 +4591,7 @@ Create_func_srid::create(THD *thd, Item *arg1)
 Create_func_startpoint Create_func_startpoint::s_singleton;
 
 Item*
-Create_func_startpoint::create(THD *thd, Item *arg1)
+Create_func_startpoint::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_spatial_decomp(arg1,
                                                       Item_func::SP_STARTPOINT);
@@ -4535,7 +4602,7 @@ Create_func_startpoint::create(THD *thd, Item *arg1)
 Create_func_str_to_date Create_func_str_to_date::s_singleton;
 
 Item*
-Create_func_str_to_date::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_str_to_date::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_str_to_date(arg1, arg2);
 }
@@ -4544,7 +4611,7 @@ Create_func_str_to_date::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_strcmp Create_func_strcmp::s_singleton;
 
 Item*
-Create_func_strcmp::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_strcmp::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_strcmp(arg1, arg2);
 }
@@ -4553,7 +4620,7 @@ Create_func_strcmp::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_substr_index Create_func_substr_index::s_singleton;
 
 Item*
-Create_func_substr_index::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_substr_index::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_substr_index(arg1, arg2, arg3);
 }
@@ -4562,7 +4629,7 @@ Create_func_substr_index::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_subtime Create_func_subtime::s_singleton;
 
 Item*
-Create_func_subtime::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_subtime::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_add_time(arg1, arg2, 0, 1);
 }
@@ -4571,7 +4638,7 @@ Create_func_subtime::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_tan Create_func_tan::s_singleton;
 
 Item*
-Create_func_tan::create(THD *thd, Item *arg1)
+Create_func_tan::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_tan(arg1);
 }
@@ -4580,7 +4647,7 @@ Create_func_tan::create(THD *thd, Item *arg1)
 Create_func_time_format Create_func_time_format::s_singleton;
 
 Item*
-Create_func_time_format::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_time_format::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_date_format(arg1, arg2, 1);
 }
@@ -4589,7 +4656,7 @@ Create_func_time_format::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_time_to_sec Create_func_time_to_sec::s_singleton;
 
 Item*
-Create_func_time_to_sec::create(THD *thd, Item *arg1)
+Create_func_time_to_sec::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_time_to_sec(arg1);
 }
@@ -4598,7 +4665,7 @@ Create_func_time_to_sec::create(THD *thd, Item *arg1)
 Create_func_timediff Create_func_timediff::s_singleton;
 
 Item*
-Create_func_timediff::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_timediff::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_timediff(arg1, arg2);
 }
@@ -4607,7 +4674,7 @@ Create_func_timediff::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_to_days Create_func_to_days::s_singleton;
 
 Item*
-Create_func_to_days::create(THD *thd, Item *arg1)
+Create_func_to_days::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_to_days(arg1);
 }
@@ -4616,7 +4683,7 @@ Create_func_to_days::create(THD *thd, Item *arg1)
 Create_func_to_seconds Create_func_to_seconds::s_singleton;
 
 Item*
-Create_func_to_seconds::create(THD *thd, Item *arg1)
+Create_func_to_seconds::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_to_seconds(arg1);
 }
@@ -4626,7 +4693,7 @@ Create_func_to_seconds::create(THD *thd, Item *arg1)
 Create_func_touches Create_func_touches::s_singleton;
 
 Item*
-Create_func_touches::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_touches::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_TOUCHES_FUNC);
@@ -4637,7 +4704,7 @@ Create_func_touches::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_ucase Create_func_ucase::s_singleton;
 
 Item*
-Create_func_ucase::create(THD *thd, Item *arg1)
+Create_func_ucase::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ucase(arg1);
 }
@@ -4646,7 +4713,7 @@ Create_func_ucase::create(THD *thd, Item *arg1)
 Create_func_uncompress Create_func_uncompress::s_singleton;
 
 Item*
-Create_func_uncompress::create(THD *thd, Item *arg1)
+Create_func_uncompress::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_uncompress(arg1);
 }
@@ -4655,7 +4722,7 @@ Create_func_uncompress::create(THD *thd, Item *arg1)
 Create_func_uncompressed_length Create_func_uncompressed_length::s_singleton;
 
 Item*
-Create_func_uncompressed_length::create(THD *thd, Item *arg1)
+Create_func_uncompressed_length::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_uncompressed_length(arg1);
 }
@@ -4664,7 +4731,7 @@ Create_func_uncompressed_length::create(THD *thd, Item *arg1)
 Create_func_unhex Create_func_unhex::s_singleton;
 
 Item*
-Create_func_unhex::create(THD *thd, Item *arg1)
+Create_func_unhex::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_unhex(arg1);
 }
@@ -4709,7 +4776,7 @@ Create_func_unix_timestamp::create_native(THD *thd, LEX_STRING name,
 Create_func_uuid Create_func_uuid::s_singleton;
 
 Item*
-Create_func_uuid::create(THD *thd)
+Create_func_uuid::create_builder(THD *thd)
 {
   DBUG_ENTER("Create_func_uuid::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -4721,7 +4788,7 @@ Create_func_uuid::create(THD *thd)
 Create_func_uuid_short Create_func_uuid_short::s_singleton;
 
 Item*
-Create_func_uuid_short::create(THD *thd)
+Create_func_uuid_short::create_builder(THD *thd)
 {
   DBUG_ENTER("Create_func_uuid_short::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -4733,7 +4800,7 @@ Create_func_uuid_short::create(THD *thd)
 Create_func_version Create_func_version::s_singleton;
 
 Item*
-Create_func_version::create(THD *thd)
+Create_func_version::create_builder(THD *thd)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   return new (thd->mem_root) Item_static_string_func("version()",
@@ -4747,7 +4814,7 @@ Create_func_version::create(THD *thd)
 Create_func_weekday Create_func_weekday::s_singleton;
 
 Item*
-Create_func_weekday::create(THD *thd, Item *arg1)
+Create_func_weekday::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_weekday(arg1, 0);
 }
@@ -4756,7 +4823,7 @@ Create_func_weekday::create(THD *thd, Item *arg1)
 Create_func_weekofyear Create_func_weekofyear::s_singleton;
 
 Item*
-Create_func_weekofyear::create(THD *thd, Item *arg1)
+Create_func_weekofyear::create_1_arg(THD *thd, Item *arg1)
 {
   Item *i1= new (thd->mem_root) Item_int((char*) "0", 3, 1);
   return new (thd->mem_root) Item_func_week(arg1, i1);
@@ -4767,7 +4834,7 @@ Create_func_weekofyear::create(THD *thd, Item *arg1)
 Create_func_within Create_func_within::s_singleton;
 
 Item*
-Create_func_within::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_within::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_WITHIN_FUNC);
@@ -4779,7 +4846,7 @@ Create_func_within::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_x Create_func_x::s_singleton;
 
 Item*
-Create_func_x::create(THD *thd, Item *arg1)
+Create_func_x::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_x(arg1);
 }
@@ -4789,7 +4856,7 @@ Create_func_x::create(THD *thd, Item *arg1)
 Create_func_xml_extractvalue Create_func_xml_extractvalue::s_singleton;
 
 Item*
-Create_func_xml_extractvalue::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_xml_extractvalue::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_xml_extractvalue(arg1, arg2);
 }
@@ -4798,7 +4865,7 @@ Create_func_xml_extractvalue::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_xml_update Create_func_xml_update::s_singleton;
 
 Item*
-Create_func_xml_update::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_xml_update::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_xml_update(arg1, arg2, arg3);
 }
@@ -4808,7 +4875,7 @@ Create_func_xml_update::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_y Create_func_y::s_singleton;
 
 Item*
-Create_func_y::create(THD *thd, Item *arg1)
+Create_func_y::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_y(arg1);
 }
@@ -5184,6 +5251,17 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
                  CHARSET_INFO *cs)
 {
   Item *UNINIT_VAR(res);
+  ulonglong length= 0, decimals= 0;
+  int error;
+  
+  /*
+    We don't have to check for error here as sql_yacc.yy has guaranteed
+    that the values are in range of ulonglong
+  */
+  if (c_len)
+    length= (ulonglong) my_strtoll10(c_len, NULL, &error);
+  if (c_dec)
+    decimals= (ulonglong) my_strtoll10(c_dec, NULL, &error);
 
   switch (cast_type) {
   case ITEM_CAST_BINARY:
@@ -5199,62 +5277,50 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
     res= new (thd->mem_root) Item_date_typecast(a);
     break;
   case ITEM_CAST_TIME:
-    res= new (thd->mem_root) Item_time_typecast(a);
-    break;
-  case ITEM_CAST_DATETIME:
-    res= new (thd->mem_root) Item_datetime_typecast(a);
-    break;
-  case ITEM_CAST_DECIMAL:
-  {
-    ulong len= 0;
-    uint dec= 0;
-
-    if (c_len)
+    if (decimals > MAX_DATETIME_PRECISION)
     {
-      ulong decoded_size;
-      errno= 0;
-      decoded_size= strtoul(c_len, NULL, 10);
-      if (errno != 0)
-      {
-        my_error(ER_TOO_BIG_PRECISION, MYF(0), INT_MAX, a->name,
-                 static_cast<ulong>(DECIMAL_MAX_PRECISION));
-        return NULL;
-      }
-      len= decoded_size;
-    }
-
-    if (c_dec)
-    {
-      ulong decoded_size;
-      errno= 0;
-      decoded_size= strtoul(c_dec, NULL, 10);
-      if ((errno != 0) || (decoded_size > UINT_MAX))
-      {
-        my_error(ER_TOO_BIG_SCALE, MYF(0), INT_MAX, a->name,
-                 static_cast<ulong>(DECIMAL_MAX_SCALE));
-        return NULL;
-      }
-      dec= decoded_size;
-    }
-    my_decimal_trim(&len, &dec);
-    if (len < dec)
-    {
-      my_error(ER_M_BIGGER_THAN_D, MYF(0), "");
+      wrong_precision_error(ER_TOO_BIG_PRECISION, a, decimals,
+                            MAX_DATETIME_PRECISION);
       return 0;
     }
-    if (len > DECIMAL_MAX_PRECISION)
+    res= new (thd->mem_root) Item_time_typecast(a, (uint) decimals);
+    break;
+  case ITEM_CAST_DATETIME:
+    if (decimals > MAX_DATETIME_PRECISION)
     {
-      my_error(ER_TOO_BIG_PRECISION, MYF(0), static_cast<int>(len), a->name,
-               static_cast<ulong>(DECIMAL_MAX_PRECISION));
+      wrong_precision_error(ER_TOO_BIG_PRECISION, a, decimals,
+                            MAX_DATETIME_PRECISION);
       return 0;
     }
-    if (dec > DECIMAL_MAX_SCALE)
+    res= new (thd->mem_root) Item_datetime_typecast(a, (uint) decimals);
+    break;
+  case ITEM_CAST_DECIMAL:
+  {
+    ulong len;
+    uint dec;
+    if (get_length_and_scale(length, decimals, &len, &dec,
+                             DECIMAL_MAX_PRECISION, DECIMAL_MAX_SCALE,
+                             a))
+      return NULL;
+    res= new (thd->mem_root) Item_decimal_typecast(a, len, dec);
+    break;
+  }
+  case ITEM_CAST_DOUBLE:
+  {
+    ulong len;
+    uint dec;
+
+    if (!c_len)
     {
-      my_error(ER_TOO_BIG_SCALE, MYF(0), dec, a->name,
-               static_cast<ulong>(DECIMAL_MAX_SCALE));
-      return 0;
+      length=   DBL_DIG+7;
+      decimals= NOT_FIXED_DEC;
     }
-    res= new (thd->mem_root) Item_decimal_typecast(a, len, dec);
+    else if (get_length_and_scale(length, decimals, &len, &dec,
+                                  DECIMAL_MAX_PRECISION, NOT_FIXED_DEC-1,
+                                  a))
+      return NULL;
+    res= new (thd->mem_root) Item_double_typecast(a, (uint) length,
+                                                  (uint) decimals);
     break;
   }
   case ITEM_CAST_CHAR:
@@ -5263,15 +5329,15 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
     CHARSET_INFO *real_cs= (cs ? cs : thd->variables.collation_connection);
     if (c_len)
     {
-      ulong decoded_size;
-      errno= 0;
-      decoded_size= strtoul(c_len, NULL, 10);
-      if ((errno != 0) || (decoded_size > MAX_FIELD_BLOBLENGTH))
+      if (length > MAX_FIELD_BLOBLENGTH)
       {
-        my_error(ER_TOO_BIG_DISPLAYWIDTH, MYF(0), "cast as char", MAX_FIELD_BLOBLENGTH);
+        char buff[1024];
+        String buf(buff, sizeof(buff), system_charset_info);
+        my_error(ER_TOO_BIG_DISPLAYWIDTH, MYF(0), item_name(a, &buf),
+                 MAX_FIELD_BLOBLENGTH);
         return NULL;
       }
-      len= (int) decoded_size;
+      len= (int) length;
     }
     res= new (thd->mem_root) Item_char_typecast(a, len, real_cs);
     break;
@@ -5285,3 +5351,95 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
   }
   return res;
 }
+
+
+static List<Item> *create_func_dyncol_prepare(THD *thd,
+                                              DYNCALL_CREATE_DEF **dfs,
+                                              List<DYNCALL_CREATE_DEF> &list)
+{
+  DYNCALL_CREATE_DEF *def;
+  List_iterator_fast<DYNCALL_CREATE_DEF> li(list);
+  List<Item> *args= new (thd->mem_root) List<Item>;
+
+  *dfs= (DYNCALL_CREATE_DEF *)alloc_root(thd->mem_root,
+                                         sizeof(DYNCALL_CREATE_DEF) *
+                                         list.elements);
+
+  if (!args || !*dfs)
+    return NULL;
+
+  for (uint i= 0; (def= li++) ;)
+  {
+    dfs[0][i++]= *def;
+    args->push_back(def->num);
+    args->push_back(def->value);
+  }
+  return args;
+}
+
+Item *create_func_dyncol_create(THD *thd, List<DYNCALL_CREATE_DEF> &list)
+{
+  List<Item> *args;
+  DYNCALL_CREATE_DEF *dfs;
+  if (!(args= create_func_dyncol_prepare(thd, &dfs, list)))
+    return NULL;
+
+  return new (thd->mem_root) Item_func_dyncol_create(*args, dfs);
+}
+
+
+Item *create_func_dyncol_add(THD *thd, Item *str,
+                             List<DYNCALL_CREATE_DEF> &list)
+{
+  List<Item> *args;
+  DYNCALL_CREATE_DEF *dfs;
+
+  if (!(args= create_func_dyncol_prepare(thd, &dfs, list)))
+    return NULL;
+
+  args->push_back(str);
+
+  return new (thd->mem_root) Item_func_dyncol_add(*args, dfs);
+}
+
+
+
+Item *create_func_dyncol_delete(THD *thd, Item *str, List<Item> &nums)
+{
+  DYNCALL_CREATE_DEF *dfs;
+  Item *num;
+  List_iterator_fast<Item> it(nums);
+  List<Item> *args= new (thd->mem_root) List<Item>;
+
+  dfs= (DYNCALL_CREATE_DEF *)alloc_root(thd->mem_root,
+                                        sizeof(DYNCALL_CREATE_DEF) *
+                                        nums.elements);
+  if (!args || !dfs)
+    return NULL;
+
+  for (uint i= 0; (num= it++); i++)
+  {
+    dfs[i].num= num;
+    dfs[i].value= new Item_null();
+    dfs[i].type= DYN_COL_INT;
+    args->push_back(dfs[i].num);
+    args->push_back(dfs[i].value);
+  }
+
+  args->push_back(str);
+
+  return new (thd->mem_root) Item_func_dyncol_add(*args, dfs);
+}
+
+
+Item *create_func_dyncol_get(THD *thd,  Item *str, Item *num,
+                             Cast_target cast_type,
+                             const char *c_len, const char *c_dec,
+                             CHARSET_INFO *cs)
+{
+  Item *res;
+
+  if (!(res= new (thd->mem_root) Item_dyncol_get(str, num)))
+    return res;                                 // Return NULL
+  return create_func_cast(thd, res, cast_type, c_len, c_dec, cs);
+}
diff --git a/sql/item_create.h b/sql/item_create.h
index 420446fcea4..a8a8b9d97dd 100644
--- a/sql/item_create.h
+++ b/sql/item_create.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -93,8 +94,9 @@ public:
     @param item_list The list of arguments to the function, can be NULL
     @return An item representing the parsed function call
   */
-  virtual Item* create(THD *thd, LEX_STRING db, LEX_STRING name,
-                       bool use_explicit_name, List<Item> *item_list) = 0;
+  virtual Item *create_with_db(THD *thd, LEX_STRING db, LEX_STRING name,
+                               bool use_explicit_name,
+                               List<Item> *item_list) = 0;
 
 protected:
   /** Constructor. */
@@ -166,8 +168,17 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
                  const char *len, const char *dec,
                  CHARSET_INFO *cs);
 
+
 int item_create_init();
 void item_create_cleanup();
 
+Item *create_func_dyncol_create(THD *thd, List<DYNCALL_CREATE_DEF> &list);
+Item *create_func_dyncol_add(THD *thd, Item *str,
+                             List<DYNCALL_CREATE_DEF> &list);
+Item *create_func_dyncol_delete(THD *thd, Item *str, List<Item> &nums);
+Item *create_func_dyncol_get(THD *thd, Item *num, Item *str,
+                             Cast_target cast_type,
+                             const char *c_len, const char *c_dec,
+                             CHARSET_INFO *cs);
 #endif
 
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 7257b411ec4..c765d142285 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+   Copyright (c) 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -38,7 +39,7 @@
 #include "strfunc.h"                            // find_type
 #include "sql_parse.h"                          // is_update_query
 #include "sql_acl.h"                            // EXECUTE_ACL
-#include "mysqld.h"                             // LOCK_uuid_generator
+#include "mysqld.h"                             // LOCK_short_uuid_generator
 #include "rpl_mi.h"
 #include <m_ctype.h>
 #include <hash.h>
@@ -103,6 +104,7 @@ void Item_func::set_arguments(List<Item> &list)
     {
       *(save_args++)= item;
       with_sum_func|=item->with_sum_func;
+      with_field|= item->with_field;
     }
   }
   list.empty();					// Fields are used
@@ -153,6 +155,7 @@ Item_func::Item_func(THD *thd, Item_func *item)
     Sets as a side effect the following class variables:
       maybe_null	Set if any argument may return NULL
       with_sum_func	Set if any of the arguments contains a sum function
+      with_field        Set if any of the arguments contains or is a field
       used_tables_cache Set to union of the tables used by arguments
 
       str_value.charset If this is a string function, set this to the
@@ -174,7 +177,9 @@ Item_func::fix_fields(THD *thd, Item **ref)
 {
   DBUG_ASSERT(fixed == 0);
   Item **arg,**arg_end;
+  TABLE_LIST *save_emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
   uchar buff[STACK_BUFF_ALLOC];			// Max argument in function
+  thd->thd_marker.emb_on_expr_nest= NULL;
 
   used_tables_cache= not_null_tables_cache= 0;
   const_item_cache=1;
@@ -218,8 +223,8 @@ Item_func::fix_fields(THD *thd, Item **ref)
 	maybe_null=1;
 
       with_sum_func= with_sum_func || item->with_sum_func;
+      with_field= with_field || item->with_field;
       used_tables_cache|=     item->used_tables();
-      not_null_tables_cache|= item->not_null_tables();
       const_item_cache&=      item->const_item();
       with_subselect|=        item->with_subselect;
     }
@@ -228,9 +233,63 @@ Item_func::fix_fields(THD *thd, Item **ref)
   if (thd->is_error()) // An error inside fix_length_and_dec occured
     return TRUE;
   fixed= 1;
+  thd->thd_marker.emb_on_expr_nest= save_emb_on_expr_nest;
   return FALSE;
 }
 
+void
+Item_func::quick_fix_field()
+{
+  Item **arg,**arg_end;
+  if (arg_count)
+  {
+    for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++)
+    {
+      if (!(*arg)->fixed)
+        (*arg)->quick_fix_field();
+    }
+  }
+  fixed= 1;
+}
+
+
+bool
+Item_func::eval_not_null_tables(uchar *opt_arg)
+{
+  Item **arg,**arg_end;
+  not_null_tables_cache= 0;
+  if (arg_count)
+  {		
+    for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++)
+    {
+      not_null_tables_cache|= (*arg)->not_null_tables();
+    }
+  }
+  return FALSE;
+}
+
+
+void Item_func::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  Item **arg,**arg_end;
+
+  used_tables_cache= not_null_tables_cache= 0;
+  const_item_cache=1;
+
+  if (arg_count)
+  {
+    for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++)
+    {
+      (*arg)->fix_after_pullout(new_parent, arg);
+      Item *item= *arg;
+
+      used_tables_cache|=     item->used_tables();
+      not_null_tables_cache|= item->not_null_tables();
+      const_item_cache&=      item->const_item();
+    }
+  }
+}
+
 
 bool Item_func::walk(Item_processor processor, bool walk_subquery,
                      uchar *argument)
@@ -331,6 +390,8 @@ Item *Item_func::transform(Item_transformer transformer, uchar *argument)
     the old item is substituted for a new one.
     After this the transformer is applied to the root node
     of the Item_func object. 
+    The compile function is not called if the analyzer returns NULL
+    in the parameter arg_p. 
 
   @param analyzer      the analyzer callback function to be applied to the
                        nodes of the tree of the object
@@ -348,7 +409,7 @@ Item *Item_func::compile(Item_analyzer analyzer, uchar **arg_p,
 {
   if (!(this->*analyzer)(arg_p))
     return 0;
-  if (arg_count)
+  if (*arg_p && arg_count)
   {
     Item **arg,**arg_end;
     for (arg= args, arg_end= args+arg_count; arg != arg_end; arg++)
@@ -356,7 +417,7 @@ Item *Item_func::compile(Item_analyzer analyzer, uchar **arg_p,
       /* 
         The same parameter value of arg_p must be passed
         to analyze any argument of the condition formula.
-      */   
+      */
       uchar *arg_v= *arg_p;
       Item *new_item= (*arg)->compile(analyzer, &arg_v, transformer, arg_t);
       if (new_item && *arg != new_item)
@@ -480,12 +541,12 @@ Field *Item_func::tmp_table_field(TABLE *table)
     break;
   case STRING_RESULT:
     return make_string_field(table);
-    break;
   case DECIMAL_RESULT:
     field= Field_new_decimal::create_from_item(this);
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     // This case should never be chosen
     DBUG_ASSERT(0);
     field= 0;
@@ -496,12 +557,12 @@ Field *Item_func::tmp_table_field(TABLE *table)
   return field;
 }
 
-
+/*
 bool Item_func::is_expensive_processor(uchar *arg)
 {
   return is_expensive();
 }
-
+*/
 
 my_decimal *Item_func::val_decimal(my_decimal *decimal_value)
 {
@@ -693,8 +754,8 @@ void Item_num_op::find_num_type(void)
   DBUG_ENTER("Item_num_op::find_num_type");
   DBUG_PRINT("info", ("name %s", func_name()));
   DBUG_ASSERT(arg_count == 2);
-  Item_result r0= args[0]->result_type();
-  Item_result r1= args[1]->result_type();
+  Item_result r0= args[0]->cast_to_int_type();
+  Item_result r1= args[1]->cast_to_int_type();
 
   if (r0 == REAL_RESULT || r1 == REAL_RESULT ||
       r0 == STRING_RESULT || r1 ==STRING_RESULT)
@@ -703,7 +764,8 @@ void Item_num_op::find_num_type(void)
     max_length= float_length(decimals);
     hybrid_type= REAL_RESULT;
   }
-  else if (r0 == DECIMAL_RESULT || r1 == DECIMAL_RESULT)
+  else if (r0 == DECIMAL_RESULT || r1 == DECIMAL_RESULT ||
+           r0 == TIME_RESULT || r1 == TIME_RESULT)
   {
     hybrid_type= DECIMAL_RESULT;
     result_precision();
@@ -734,7 +796,7 @@ void Item_func_num1::find_num_type()
 {
   DBUG_ENTER("Item_func_num1::find_num_type");
   DBUG_PRINT("info", ("name %s", func_name()));
-  switch (hybrid_type= args[0]->result_type()) {
+  switch (hybrid_type= args[0]->cast_to_int_type()) {
   case INT_RESULT:
     unsigned_flag= args[0]->unsigned_flag;
     break;
@@ -743,9 +805,12 @@ void Item_func_num1::find_num_type()
     hybrid_type= REAL_RESULT;
     max_length= float_length(decimals);
     break;
+  case TIME_RESULT:
+    hybrid_type= DECIMAL_RESULT;
   case DECIMAL_RESULT:
     break;
-  default:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   DBUG_PRINT("info", ("Type: %s",
@@ -803,7 +868,9 @@ String *Item_func_numhybrid::val_str(String *str)
   }
   case STRING_RESULT:
     return str_op(&str_value);
-  default:
+  case TIME_RESULT:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   return str;
@@ -838,7 +905,9 @@ double Item_func_numhybrid::val_real()
     return (res ? my_strntod(res->charset(), (char*) res->ptr(), res->length(),
 			     &end_not_used, &err_not_used) : 0.0);
   }
-  default:
+  case TIME_RESULT:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   return 0.0;
@@ -873,7 +942,9 @@ longlong Item_func_numhybrid::val_int()
     CHARSET_INFO *cs= res->charset();
     return (*(cs->cset->strtoll10))(cs, res->ptr(), &end, &err_not_used);
   }
-  default:
+  case TIME_RESULT:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   return 0;
@@ -911,7 +982,8 @@ my_decimal *Item_func_numhybrid::val_decimal(my_decimal *decimal_value)
     break;
   }  
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   return val;
@@ -972,21 +1044,32 @@ longlong Item_func_signed::val_int()
   longlong value;
   int error;
 
-  if (args[0]->cast_to_int_type() != STRING_RESULT ||
-      args[0]->result_as_longlong())
+  if (args[0]->cast_to_int_type() != STRING_RESULT)
+  {
+    value= args[0]->val_int();
+    null_value= args[0]->null_value; 
+    return value;
+  }
+  else if (args[0]->dynamic_result())
   {
+    /* We come here when argument has an unknown type */
+    args[0]->unsigned_flag= 0;   // Mark that we want to have a signed value
     value= args[0]->val_int();
     null_value= args[0]->null_value; 
+    if (!null_value && args[0]->unsigned_flag && value < 0)
+      goto err;                                 // Warn about overflow
     return value;
   }
 
   value= val_int_from_str(&error);
   if (value < 0 && error == 0)
-  {
-    push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
-                 "Cast to signed converted positive out-of-range integer to "
-                 "it's negative complement");
-  }
+    goto err;
+  return value;
+
+err:
+  push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+               "Cast to signed converted positive out-of-range integer to "
+               "it's negative complement");
   return value;
 }
 
@@ -1014,19 +1097,35 @@ longlong Item_func_unsigned::val_int()
       value= 0;
     return value;
   }
-  else if (args[0]->cast_to_int_type() != STRING_RESULT ||
-           args[0]->result_as_longlong())
+  else if (args[0]->dynamic_result())
   {
+    /* We come here when argument has an unknown type */
+    args[0]->unsigned_flag= 1;   // Mark that we want to have an unsigned value
     value= args[0]->val_int();
     null_value= args[0]->null_value; 
+    if (!null_value && args[0]->unsigned_flag == 0 && value < 0)
+      goto err;                                 // Warn about overflow
+    return value;
+  }
+  else if (args[0]->cast_to_int_type() != STRING_RESULT)
+  {
+    value= args[0]->val_int();
+    null_value= args[0]->null_value; 
+    if (!null_value && args[0]->unsigned_flag == 0 && value < 0)
+      goto err;                                 // Warn about overflow
     return value;
   }
 
   value= val_int_from_str(&error);
   if (error < 0)
-    push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
-                 "Cast to unsigned converted negative integer to it's "
-                 "positive complement");
+    goto err;
+
+  return value;
+
+err:
+  push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+               "Cast to unsigned converted negative integer to it's "
+               "positive complement");
   return value;
 }
 
@@ -1124,6 +1223,51 @@ void Item_decimal_typecast::print(String *str, enum_query_type query_type)
 }
 
 
+double Item_double_typecast::val_real()
+{
+  int error;
+  double tmp= args[0]->val_real();
+  if ((null_value= args[0]->null_value))
+    return 0.0;
+
+  if ((error= truncate_double(&tmp, max_length, decimals, 0, DBL_MAX)))
+  {
+    push_warning_printf(current_thd,
+                        MYSQL_ERROR::WARN_LEVEL_WARN,
+                        ER_WARN_DATA_OUT_OF_RANGE,
+                        ER(ER_WARN_DATA_OUT_OF_RANGE),
+                        name, 1);
+    if (error < 0)
+    {
+      null_value= 1;                            // Illegal value
+      tmp= 0.0;
+    }
+  }
+  return tmp;
+}
+
+
+void Item_double_typecast::print(String *str, enum_query_type query_type)
+{
+  char len_buf[20*3 + 1];
+  char *end;
+
+  str->append(STRING_WITH_LEN("cast("));
+  args[0]->print(str, query_type);
+  str->append(STRING_WITH_LEN(" as double"));
+  if (decimals != NOT_FIXED_DEC)
+  {
+    str->append('(');
+    end= int10_to_str(max_length, len_buf,10);
+    str->append(len_buf, (uint32) (end - len_buf));
+    str->append(',');
+    end= int10_to_str(decimals, len_buf,10);
+    str->append(len_buf, (uint32) (end - len_buf));
+    str->append(')');
+  }
+  str->append(')');
+}
+
 double Item_func_plus::real_op()
 {
   double value= args[0]->val_real() + args[1]->val_real();
@@ -1542,7 +1686,7 @@ void Item_func_div::fix_length_and_dec()
   DBUG_ENTER("Item_func_div::fix_length_and_dec");
   prec_increment= current_thd->variables.div_precincrement;
   Item_num_op::fix_length_and_dec();
-  switch(hybrid_type) {
+  switch (hybrid_type) {
   case REAL_RESULT:
   {
     decimals=max(args[0]->decimals,args[1]->decimals)+prec_increment;
@@ -1565,7 +1709,10 @@ void Item_func_div::fix_length_and_dec()
   case DECIMAL_RESULT:
     result_precision();
     break;
-  default:
+  case STRING_RESULT:
+  case ROW_RESULT:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   maybe_null= 1; // devision by zero
@@ -2111,7 +2258,7 @@ void Item_func_int_val::find_num_type()
 {
   DBUG_ENTER("Item_func_int_val::find_num_type");
   DBUG_PRINT("info", ("name %s", func_name()));
-  switch(hybrid_type= args[0]->result_type())
+  switch (hybrid_type= args[0]->cast_to_int_type())
   {
   case STRING_RESULT:
   case REAL_RESULT:
@@ -2119,6 +2266,7 @@ void Item_func_int_val::find_num_type()
     max_length= float_length(decimals);
     break;
   case INT_RESULT:
+  case TIME_RESULT:
   case DECIMAL_RESULT:
     /*
       -2 because in most high position can't be used any digit for longlong
@@ -2135,7 +2283,8 @@ void Item_func_int_val::find_num_type()
       hybrid_type= INT_RESULT;
     }
     break;
-  default:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   DBUG_PRINT("info", ("Type: %s",
@@ -2264,7 +2413,7 @@ void Item_func_round::fix_length_and_dec()
   }
 
   val1= args[1]->val_int();
-  if ((null_value= args[1]->is_null()))
+  if ((null_value= args[1]->null_value))
     return;
 
   val1_unsigned= args[1]->unsigned_flag;
@@ -2314,7 +2463,9 @@ void Item_func_round::fix_length_and_dec()
                                                              unsigned_flag);
     break;
   }
-  default:
+  case ROW_RESULT:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0); /* This result type isn't handled */
   }
 }
@@ -2355,10 +2506,12 @@ double Item_func_round::real_op()
 {
   double value= args[0]->val_real();
 
-  if (!(null_value= args[0]->null_value || args[1]->null_value))
-    return my_double_round(value, args[1]->val_int(), args[1]->unsigned_flag,
-                           truncate);
-
+  if (!(null_value= args[0]->null_value))
+  {
+    longlong dec= args[1]->val_int();
+    if (!(null_value= args[1]->null_value))
+      return my_double_round(value, dec, args[1]->unsigned_flag, truncate);
+  }
   return 0.0;
 }
 
@@ -2428,7 +2581,7 @@ void Item_func_rand::seed_random(Item *arg)
     args[0] is a constant.
   */
   uint32 tmp= (uint32) arg->val_int();
-  randominit(rand, (uint32) (tmp*0x10001L+55555555L),
+  my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L),
              (uint32) (tmp*0x10000001L));
 }
 
@@ -2448,7 +2601,7 @@ bool Item_func_rand::fix_fields(THD *thd,Item **ref)
       No need to send a Rand log event if seed was given eg: RAND(seed),
       as it will be replicated in the query as such.
     */
-    if (!rand && !(rand= (struct rand_struct*)
+    if (!rand && !(rand= (struct my_rnd_struct*)
                    thd->stmt_arena->alloc(sizeof(*rand))))
       return TRUE;
   }
@@ -2520,10 +2673,10 @@ double Item_func_units::val_real()
 void Item_func_min_max::fix_length_and_dec()
 {
   int max_int_part=0;
-  bool datetime_found= FALSE;
   decimals=0;
   max_length=0;
   maybe_null=0;
+  thd= current_thd;
   cmp_type=args[0]->result_type();
 
   for (uint i=0 ; i < arg_count ; i++)
@@ -2532,25 +2685,12 @@ void Item_func_min_max::fix_length_and_dec()
     set_if_bigger(decimals, args[i]->decimals);
     set_if_bigger(max_int_part, args[i]->decimal_int_part());
     if (args[i]->maybe_null)
-      maybe_null=1;
-    cmp_type=item_cmp_type(cmp_type,args[i]->result_type());
-    if (args[i]->result_type() != ROW_RESULT && args[i]->is_datetime())
-    {
-      datetime_found= TRUE;
-      if (!datetime_item || args[i]->field_type() == MYSQL_TYPE_DATETIME)
-        datetime_item= args[i];
-    }
+      maybe_null= 1;
+    cmp_type= item_cmp_type(cmp_type,args[i]->result_type());
   }
   if (cmp_type == STRING_RESULT)
-  {
     agg_arg_charsets_for_string_result_with_comparison(collation,
                                                        args, arg_count);
-    if (datetime_found)
-    {
-      thd= current_thd;
-      compare_as_dates= TRUE;
-    }
-  }
   else if ((cmp_type == DECIMAL_RESULT) || (cmp_type == INT_RESULT))
   {
     collation.set_numeric();
@@ -2561,61 +2701,73 @@ void Item_func_min_max::fix_length_and_dec()
   }
   else if (cmp_type == REAL_RESULT)
     fix_char_length(float_length(decimals));
-  cached_field_type= agg_field_type(args, arg_count);
+
+  compare_as_dates= find_date_time_item(args, arg_count, 0);
+  if (compare_as_dates)
+  {
+    cached_field_type= compare_as_dates->field_type();
+    if (mysql_type_to_time_type(cached_field_type) == MYSQL_TIMESTAMP_DATE)
+      decimals= 0;
+    else
+      set_if_smaller(decimals, TIME_SECOND_PART_DIGITS);
+  }
+  else
+    cached_field_type= agg_field_type(args, arg_count);
 }
 
 
 /*
   Compare item arguments in the DATETIME context.
 
-  SYNOPSIS
-    cmp_datetimes()
-    value [out]   found least/greatest DATE/DATETIME value
-
   DESCRIPTION
     Compare item arguments as DATETIME values and return the index of the
     least/greatest argument in the arguments array.
-    The correct integer DATE/DATETIME value of the found argument is
+    The correct DATE/DATETIME value of the found argument is
     stored to the value pointer, if latter is provided.
 
   RETURN
-   0	If one of arguments is NULL or there was a execution error
-   #	index of the least/greatest argument
+   1	If one of arguments is NULL or there was a execution error
+   0    Otherwise
 */
 
-uint Item_func_min_max::cmp_datetimes(ulonglong *value)
+bool Item_func_min_max::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   longlong UNINIT_VAR(min_max);
-  uint min_max_idx= 0;
+  DBUG_ASSERT(fixed == 1);
+
+  /*
+    just like ::val_int() method of a string item can be called,
+    for example, SELECT CONCAT("10", "12") + 1,
+    ::get_date() can be called for non-temporal values,
+    for example, SELECT MONTH(GREATEST("2011-11-21", "2010-10-09"))
+
+  */
+  if (!compare_as_dates)
+    return Item_func::get_date(ltime, fuzzy_date);
 
   for (uint i=0; i < arg_count ; i++)
   {
     Item **arg= args + i;
     bool is_null;
-    longlong res= get_datetime_value(thd, &arg, 0, datetime_item, &is_null);
+    longlong res= get_datetime_value(thd, &arg, 0, compare_as_dates, &is_null);
 
-    /* Check if we need to stop (because of error or KILL)  and stop the loop */
-    if (thd->is_error())
+    /* Check if we need to stop (because of error or KILL) and stop the loop */
+    if (thd->is_error() || args[i]->null_value)
     {
-      null_value= 1;
-      return 0;
+      return (null_value= 1);
     }
 
-    if ((null_value= args[i]->null_value))
-      return 0;
     if (i == 0 || (res < min_max ? cmp_sign : -cmp_sign) > 0)
-    {
       min_max= res;
-      min_max_idx= i;
-    }
   }
-  if (value)
+  unpack_time(min_max, ltime);
+  if (compare_as_dates->field_type() == MYSQL_TYPE_DATE)
   {
-    *value= min_max;
-    if (datetime_item->field_type() == MYSQL_TYPE_DATE)
-      *value/= 1000000L;
+    ltime->time_type= MYSQL_TIMESTAMP_DATE;
+    ltime->hour= ltime->minute= ltime->second= ltime->second_part= 0;
   }
-  return min_max_idx;
+
+  return (null_value= 0);
 }
 
 
@@ -2623,46 +2775,14 @@ String *Item_func_min_max::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
   if (compare_as_dates)
-  {
-    String *str_res;
-    uint min_max_idx= cmp_datetimes(NULL);
-    if (null_value)
-      return 0;
-    str_res= args[min_max_idx]->val_str(str);
-    if (args[min_max_idx]->null_value)
-    {
-      // check if the call to val_str() above returns a NULL value
-      null_value= 1;
-      return NULL;
-    }
-    str_res->set_charset(collation.collation);
-    return str_res;
-  }
+    return val_string_from_date(str);
   switch (cmp_type) {
   case INT_RESULT:
-  {
-    longlong nr=val_int();
-    if (null_value)
-      return 0;
-    str->set_int(nr, unsigned_flag, collation.collation);
-    return str;
-  }
+    return val_string_from_int(str);
   case DECIMAL_RESULT:
-  {
-    my_decimal dec_buf, *dec_val= val_decimal(&dec_buf);
-    if (null_value)
-      return 0;
-    my_decimal2string(E_DEC_FATAL_ERROR, dec_val, 0, 0, 0, str);
-    return str;
-  }
+    return val_string_from_decimal(str);
   case REAL_RESULT:
-  {
-    double nr= val_real();
-    if (null_value)
-      return 0; /* purecov: inspected */
-    str->set_real(nr, decimals, collation.collation);
-    return str;
-  }
+    return val_string_from_real(str);
   case STRING_RESULT:
   {
     String *UNINIT_VAR(res);
@@ -2688,9 +2808,9 @@ String *Item_func_min_max::val_str(String *str)
     return res;
   }
   case ROW_RESULT:
-  default:
-    // This case should never be chosen
-    DBUG_ASSERT(0);
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);                // This case should never be chosen
     return 0;
   }
   return 0;					// Keep compiler happy
@@ -2703,9 +2823,11 @@ double Item_func_min_max::val_real()
   double value=0.0;
   if (compare_as_dates)
   {
-    ulonglong result= 0;
-    (void)cmp_datetimes(&result);
-    return (double)result;
+    MYSQL_TIME ltime;
+    if (get_date(&ltime, TIME_FUZZY_DATE))
+      return 0;
+
+    return TIME_to_double(&ltime);
   }
   for (uint i=0; i < arg_count ; i++)
   {
@@ -2730,9 +2852,11 @@ longlong Item_func_min_max::val_int()
   longlong value=0;
   if (compare_as_dates)
   {
-    ulonglong result= 0;
-    (void)cmp_datetimes(&result);
-    return (longlong)result;
+    MYSQL_TIME ltime;
+    if (get_date(&ltime, TIME_FUZZY_DATE))
+      return 0;
+
+    return TIME_to_ulonglong(&ltime);
   }
   for (uint i=0; i < arg_count ; i++)
   {
@@ -2758,10 +2882,11 @@ my_decimal *Item_func_min_max::val_decimal(my_decimal *dec)
 
   if (compare_as_dates)
   {
-    ulonglong value= 0;
-    (void)cmp_datetimes(&value);
-    ulonglong2decimal(value, dec);
-    return dec;
+    MYSQL_TIME ltime;
+    if (get_date(&ltime, TIME_FUZZY_DATE))
+      return 0;
+
+    return date2my_decimal(&ltime, dec);
   }
   for (uint i=0; i < arg_count ; i++)
   {
@@ -3063,8 +3188,7 @@ longlong Item_func_find_in_set::val_int()
   }
   null_value=0;
 
-  int diff;
-  if ((diff=buffer->length() - find->length()) >= 0)
+  if ((int) (buffer->length() - find->length()) >= 0)
   {
     my_wc_t wc= 0;
     CHARSET_INFO *cs= cmp_collation.collation;
@@ -3210,6 +3334,7 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
       if (item->maybe_null)
 	func->maybe_null=1;
       func->with_sum_func= func->with_sum_func || item->with_sum_func;
+      func->with_field= func->with_field || item->with_field;
       used_tables_cache|=item->used_tables();
       const_item_cache&=item->const_item();
       f_args.arg_type[i]=item->result_type();
@@ -3255,8 +3380,7 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
 
       if (arguments[i]->const_item())
       {
-        switch (arguments[i]->result_type()) 
-        {
+        switch (arguments[i]->result_type()) {
         case STRING_RESULT:
         case DECIMAL_RESULT:
         {
@@ -3282,9 +3406,9 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
           to+= ALIGN_SIZE(sizeof(double));
           break;
         case ROW_RESULT:
-        default:
-          // This case should never be chosen
-          DBUG_ASSERT(0);
+        case TIME_RESULT:
+        case IMPOSSIBLE_RESULT:
+          DBUG_ASSERT(0);          // This case should never be chosen
           break;
         }
       }
@@ -3357,9 +3481,9 @@ bool udf_handler::get_arguments()
       }
       break;
     case ROW_RESULT:
-    default:
-      // This case should never be chosen
-      DBUG_ASSERT(0);
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);              // This case should never be chosen
       break;
     }
   }
@@ -3463,11 +3587,15 @@ void Item_udf_func::print(String *str, enum_query_type query_type)
 
 double Item_func_udf_float::val_real()
 {
+  double res;
+  my_bool tmp_null_value;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_func_udf_float::val");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
 		     args[0]->result_type(), arg_count));
-  DBUG_RETURN(udf.val(&null_value));
+  res= udf.val(&tmp_null_value);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -3484,9 +3612,13 @@ String *Item_func_udf_float::val_str(String *str)
 
 longlong Item_func_udf_int::val_int()
 {
+  longlong res;
+  my_bool tmp_null_value;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_func_udf_int::val_int");
-  DBUG_RETURN(udf.val_int(&null_value));
+  res= udf.val_int(&tmp_null_value);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -3503,8 +3635,10 @@ String *Item_func_udf_int::val_str(String *str)
 
 longlong Item_func_udf_decimal::val_int()
 {
-  my_decimal dec_buf, *dec= udf.val_decimal(&null_value, &dec_buf);
+  my_bool tmp_null_value;
   longlong result;
+  my_decimal dec_buf, *dec= udf.val_decimal(&tmp_null_value, &dec_buf);
+  null_value= tmp_null_value;
   if (null_value)
     return 0;
   my_decimal2int(E_DEC_FATAL_ERROR, dec, unsigned_flag, &result);
@@ -3514,8 +3648,10 @@ longlong Item_func_udf_decimal::val_int()
 
 double Item_func_udf_decimal::val_real()
 {
-  my_decimal dec_buf, *dec= udf.val_decimal(&null_value, &dec_buf);
+  my_bool tmp_null_value;
   double result;
+  my_decimal dec_buf, *dec= udf.val_decimal(&tmp_null_value, &dec_buf);
+  null_value= tmp_null_value;
   if (null_value)
     return 0.0;
   my_decimal2double(E_DEC_FATAL_ERROR, dec, &result);
@@ -3525,18 +3661,24 @@ double Item_func_udf_decimal::val_real()
 
 my_decimal *Item_func_udf_decimal::val_decimal(my_decimal *dec_buf)
 {
+  my_decimal *res;
+  my_bool tmp_null_value;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_func_udf_decimal::val_decimal");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
                      args[0]->result_type(), arg_count));
 
-  DBUG_RETURN(udf.val_decimal(&null_value, dec_buf));
+  res= udf.val_decimal(&tmp_null_value, dec_buf);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
 String *Item_func_udf_decimal::val_str(String *str)
 {
-  my_decimal dec_buf, *dec= udf.val_decimal(&null_value, &dec_buf);
+  my_bool tmp_null_value;
+  my_decimal dec_buf, *dec= udf.val_decimal(&tmp_null_value, &dec_buf);
+  null_value= tmp_null_value;
   if (null_value)
     return 0;
   if (str->length() < DECIMAL_MAX_STR_LENGTH)
@@ -4074,9 +4216,9 @@ longlong Item_func_benchmark::val_int()
       (void) args[1]->val_decimal(&tmp_decimal);
       break;
     case ROW_RESULT:
-    default:
-      // This case should never be chosen
-      DBUG_ASSERT(0);
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);              // This case should never be chosen
       return 0;
     }
   }
@@ -4259,6 +4401,21 @@ bool Item_func_set_user_var::fix_fields(THD *thd, Item **ref)
                          DERIVATION_IMPLICIT);
   collation.set(entry->collation.collation, DERIVATION_IMPLICIT);
   cached_result_type= args[0]->result_type();
+  if (thd->lex->current_select)
+  {
+    /*
+      When this function is used in a derived table/view force the derived
+      table to be materialized to preserve possible side-effect of setting a
+      user variable.
+    */
+    SELECT_LEX_UNIT *unit= thd->lex->current_select->master_unit();
+    TABLE_LIST *derived;
+    for (derived= unit->derived;
+         derived;
+         derived= derived->select_lex->master_unit()->derived)
+      derived->set_materialized_derived();
+  }
+
   return FALSE;
 }
 
@@ -4295,10 +4452,30 @@ bool Item_func_set_user_var::register_field_in_read_map(uchar *arg)
     TABLE *table= (TABLE *) arg;
     if (result_field->table == table || !table)
       bitmap_set_bit(result_field->table->read_set, result_field->field_index);
+    if (result_field->vcol_info)
+      return result_field->vcol_info->
+               expr_item->walk(&Item::register_field_in_read_map, 1, arg);
   }
   return 0;
 }
 
+/*
+  Mark field in bitmap supplied as *arg
+
+*/
+
+bool Item_func_set_user_var::register_field_in_bitmap(uchar *arg)
+{
+  MY_BITMAP *bitmap = (MY_BITMAP *) arg;
+  DBUG_ASSERT(bitmap);
+  if (result_field)
+  {
+    if (!bitmap)
+      return 1;
+    bitmap_set_bit(bitmap, result_field->field_index);
+  }
+  return 0;
+}
 
 /**
   Set value to user variable.
@@ -4404,7 +4581,7 @@ Item_func_set_user_var::update_hash(void *ptr, uint length,
 
 /** Get the value of a variable as a double. */
 
-double user_var_entry::val_real(my_bool *null_value)
+double user_var_entry::val_real(bool *null_value)
 {
   if ((*null_value= (value == 0)))
     return 0.0;
@@ -4423,7 +4600,9 @@ double user_var_entry::val_real(my_bool *null_value)
   case STRING_RESULT:
     return my_atof(value);                      // This is null terminated
   case ROW_RESULT:
-    DBUG_ASSERT(1);				// Impossible
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);				// Impossible
     break;
   }
   return 0.0;					// Impossible
@@ -4432,7 +4611,7 @@ double user_var_entry::val_real(my_bool *null_value)
 
 /** Get the value of a variable as an integer. */
 
-longlong user_var_entry::val_int(my_bool *null_value) const
+longlong user_var_entry::val_int(bool *null_value) const
 {
   if ((*null_value= (value == 0)))
     return LL(0);
@@ -4454,7 +4633,9 @@ longlong user_var_entry::val_int(my_bool *null_value) const
     return my_strtoll10(value, (char**) 0, &error);// String is null terminated
   }
   case ROW_RESULT:
-    DBUG_ASSERT(1);				// Impossible
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);				// Impossible
     break;
   }
   return LL(0);					// Impossible
@@ -4463,7 +4644,7 @@ longlong user_var_entry::val_int(my_bool *null_value) const
 
 /** Get the value of a variable as a string. */
 
-String *user_var_entry::val_str(my_bool *null_value, String *str,
+String *user_var_entry::val_str(bool *null_value, String *str,
 				uint decimals)
 {
   if ((*null_value= (value == 0)))
@@ -4485,8 +4666,11 @@ String *user_var_entry::val_str(my_bool *null_value, String *str,
   case STRING_RESULT:
     if (str->copy(value, length, collation.collation))
       str= 0;					// EOM error
+    break;
   case ROW_RESULT:
-    DBUG_ASSERT(1);				// Impossible
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);				// Impossible
     break;
   }
   return(str);
@@ -4494,7 +4678,7 @@ String *user_var_entry::val_str(my_bool *null_value, String *str,
 
 /** Get the value of a variable as a decimal. */
 
-my_decimal *user_var_entry::val_decimal(my_bool *null_value, my_decimal *val)
+my_decimal *user_var_entry::val_decimal(bool *null_value, my_decimal *val)
 {
   if ((*null_value= (value == 0)))
     return 0;
@@ -4513,7 +4697,9 @@ my_decimal *user_var_entry::val_decimal(my_bool *null_value, my_decimal *val)
     str2my_decimal(E_DEC_FATAL_ERROR, value, length, collation.collation, val);
     break;
   case ROW_RESULT:
-    DBUG_ASSERT(1);				// Impossible
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);				// Impossible
     break;
   }
   return(val);
@@ -4551,8 +4737,9 @@ Item_func_set_user_var::check(bool use_result_field)
   {
     save_result.vint= use_result_field ? result_field->val_int() :
                        args[0]->val_int();
-    unsigned_flag= use_result_field ? ((Field_num*)result_field)->unsigned_flag:
-                    args[0]->unsigned_flag;
+    unsigned_flag= (use_result_field ?
+                    ((Field_num*)result_field)->unsigned_flag:
+                    args[0]->unsigned_flag);
     break;
   }
   case STRING_RESULT:
@@ -4569,9 +4756,9 @@ Item_func_set_user_var::check(bool use_result_field)
     break;
   }
   case ROW_RESULT:
-  default:
-    // This case should never be chosen
-    DBUG_ASSERT(0);
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);                // This case should never be chosen
     break;
   }
   DBUG_RETURN(FALSE);
@@ -4604,9 +4791,9 @@ void Item_func_set_user_var::save_item_result(Item *item)
     save_result.vdec= item->val_decimal_result(&decimal_buff);
     break;
   case ROW_RESULT:
-  default:
-    // Should never happen
-    DBUG_ASSERT(0);
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);                // This case should never be chosen
     break;
   }
   DBUG_VOID_RETURN;
@@ -4672,9 +4859,9 @@ Item_func_set_user_var::update()
     break;
   }
   case ROW_RESULT:
-  default:
-    // This case should never be chosen
-    DBUG_ASSERT(0);
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);                // This case should never be chosen
     break;
   }
   DBUG_RETURN(res);
@@ -4862,7 +5049,7 @@ int Item_func_set_user_var::save_in_field(Field *field, bool no_conversions,
 
   if (result_type() == STRING_RESULT ||
       (result_type() == REAL_RESULT &&
-      field->result_type() == STRING_RESULT))
+       field->result_type() == STRING_RESULT))
   {
     String *result;
     CHARSET_INFO *cs= collation.collation;
@@ -5109,7 +5296,7 @@ void Item_func_get_user_var::fix_length_and_dec()
     max_length= var_entry->length;
 
     collation.set(var_entry->collation);
-    switch(m_cached_result_type) {
+    switch (m_cached_result_type) {
     case REAL_RESULT:
       fix_char_length(DBL_DIG + 8);
       break;
@@ -5125,8 +5312,9 @@ void Item_func_get_user_var::fix_length_and_dec()
       decimals= DECIMAL_MAX_SCALE;
       break;
     case ROW_RESULT:                            // Keep compiler happy
-    default:
-      DBUG_ASSERT(0);
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);                // This case should never be chosen
       break;
     }
   }
@@ -6203,7 +6391,7 @@ Item_func_sp::cleanup()
     sp_result_field= NULL;
   }
   m_sp= NULL;
-  dummy_table->alias= NULL;
+  dummy_table->alias.free();
   Item_func::cleanup();
 }
 
@@ -6229,7 +6417,7 @@ Item_func_sp::func_name() const
     qname.append('.');
   }
   append_identifier(thd, &qname, m_name->m_name.str, m_name->m_name.length);
-  return qname.ptr();
+  return qname.c_ptr_safe();
 }
 
 
@@ -6285,7 +6473,7 @@ Item_func_sp::init_result_field(THD *thd)
    */
   
   share= dummy_table->s;
-  dummy_table->alias = "";
+  dummy_table->alias.set("", 0, table_alias_charset);
   dummy_table->maybe_null = maybe_null;
   dummy_table->in_use= thd;
   dummy_table->copy_blobs= TRUE;
@@ -6607,8 +6795,8 @@ void uuid_short_init()
 longlong Item_func_uuid_short::val_int()
 {
   ulonglong val;
-  mysql_mutex_lock(&LOCK_uuid_generator);
+  mysql_mutex_lock(&LOCK_short_uuid_generator);
   val= uuid_value++;
-  mysql_mutex_unlock(&LOCK_uuid_generator);
+  mysql_mutex_unlock(&LOCK_short_uuid_generator);
   return (longlong) val;
 }
diff --git a/sql/item_func.h b/sql/item_func.h
index 9198b092539..5477c27b13d 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -1,7 +1,7 @@
 #ifndef ITEM_FUNC_INCLUDED
 #define ITEM_FUNC_INCLUDED
-
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -46,7 +46,7 @@ public:
   enum Functype { UNKNOWN_FUNC,EQ_FUNC,EQUAL_FUNC,NE_FUNC,LT_FUNC,LE_FUNC,
 		  GE_FUNC,GT_FUNC,FT_FUNC,
 		  LIKE_FUNC,ISNULL_FUNC,ISNOTNULL_FUNC,
-		  COND_AND_FUNC, COND_OR_FUNC, COND_XOR_FUNC,
+		  COND_AND_FUNC, COND_OR_FUNC, XOR_FUNC,
                   BETWEEN, IN_FUNC, MULT_EQUAL_FUNC,
 		  INTERVAL_FUNC, ISNOTNULLTEST_FUNC,
 		  SP_EQUALS_FUNC, SP_DISJOINT_FUNC,SP_INTERSECTS_FUNC,
@@ -67,6 +67,7 @@ public:
     allowed_arg_cols(1), arg_count(0)
   {
     with_sum_func= 0;
+    with_field= 0;
   }
   Item_func(Item *a):
     allowed_arg_cols(1), arg_count(1)
@@ -74,6 +75,7 @@ public:
     args= tmp_arg;
     args[0]= a;
     with_sum_func= a->with_sum_func;
+    with_field= a->with_field;
   }
   Item_func(Item *a,Item *b):
     allowed_arg_cols(1), arg_count(2)
@@ -81,6 +83,7 @@ public:
     args= tmp_arg;
     args[0]= a; args[1]= b;
     with_sum_func= a->with_sum_func || b->with_sum_func;
+    with_field= a->with_field || b->with_field;
   }
   Item_func(Item *a,Item *b,Item *c):
     allowed_arg_cols(1)
@@ -91,6 +94,7 @@ public:
       arg_count= 3;
       args[0]= a; args[1]= b; args[2]= c;
       with_sum_func= a->with_sum_func || b->with_sum_func || c->with_sum_func;
+      with_field= a->with_field || b->with_field || c->with_field;
     }
   }
   Item_func(Item *a,Item *b,Item *c,Item *d):
@@ -103,6 +107,8 @@ public:
       args[0]= a; args[1]= b; args[2]= c; args[3]= d;
       with_sum_func= a->with_sum_func || b->with_sum_func ||
 	c->with_sum_func || d->with_sum_func;
+      with_field= a->with_field || b->with_field ||
+        c->with_field || d->with_field;
     }
   }
   Item_func(Item *a,Item *b,Item *c,Item *d,Item* e):
@@ -114,12 +120,16 @@ public:
       args[0]= a; args[1]= b; args[2]= c; args[3]= d; args[4]= e;
       with_sum_func= a->with_sum_func || b->with_sum_func ||
 	c->with_sum_func || d->with_sum_func || e->with_sum_func ;
+      with_field= a->with_field || b->with_field ||
+        c->with_field || d->with_field || e->with_field;
     }
   }
   Item_func(List<Item> &list);
   // Constructor used for Item_cond_and/or (see Item comment)
   Item_func(THD *thd, Item_func *item);
   bool fix_fields(THD *, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void quick_fix_field();
   table_map used_tables() const;
   table_map not_null_tables() const;
   void update_used_tables();
@@ -210,8 +220,9 @@ public:
                 Item_transformer transformer, uchar *arg_t);
   void traverse_cond(Cond_traverser traverser,
                      void * arg, traverse_order order);
-  bool is_expensive_processor(uchar *arg);
-  virtual bool is_expensive() { return 0; }
+  bool eval_not_null_tables(uchar *opt_arg);
+ // bool is_expensive_processor(uchar *arg);
+ // virtual bool is_expensive() { return 0; }
   inline void raise_numeric_overflow(const char *type_name)
   {
     char buf[256];
@@ -314,6 +325,21 @@ public:
   }
 
   /*
+    By default only substitution for a field whose two different values
+    are never equal is allowed in the arguments of a function.
+    This is overruled for the direct arguments of comparison functions.
+  */ 
+  bool subst_argument_checker(uchar **arg) 
+  { 
+    if (*arg)
+    {
+      *arg= (uchar *) Item::IDENTITY_SUBST;
+      return TRUE;
+    }
+    return FALSE;
+  }
+
+  /*
     We assume the result of any function that has a TIMESTAMP argument to be
     timezone-dependent, since a TIMESTAMP value in both numeric and string
     contexts is interpreted according to the current timezone.
@@ -330,6 +356,21 @@ public:
   {
     return functype() == *(Functype *) arg;
   }
+
+  void no_rows_in_result()
+  {
+    bool_func_call_args info;
+    info.original_func_item= this;
+    info.bool_function= &Item::no_rows_in_result;
+    walk(&Item::call_bool_func_processor, FALSE, (uchar*) &info);
+  }
+  void restore_to_before_no_rows_in_result()
+  {
+    bool_func_call_args info;
+    info.original_func_item= this;
+    info.bool_function= &Item::restore_to_before_no_rows_in_result;
+    walk(&Item::call_bool_func_processor, FALSE, (uchar*) &info);
+  }
 };
 
 
@@ -355,6 +396,8 @@ class Item_func_numhybrid: public Item_func
 protected:
   Item_result hybrid_type;
 public:
+  Item_func_numhybrid() :Item_func(), hybrid_type(REAL_RESULT)
+  {}
   Item_func_numhybrid(Item *a) :Item_func(a), hybrid_type(REAL_RESULT)
   { collation.set_numeric(); }
   Item_func_numhybrid(Item *a,Item *b)
@@ -474,6 +517,7 @@ public:
   void fix_length_and_dec();
   bool fix_fields(THD *thd, Item **ref);
   longlong val_int() { DBUG_ASSERT(fixed == 1); return value; }
+  bool check_vcol_func_processor(uchar *int_arg) { return TRUE;}
 };
 
 
@@ -513,7 +557,7 @@ class Item_decimal_typecast :public Item_func
 public:
   Item_decimal_typecast(Item *a, int len, int dec) :Item_func(a)
   {
-    decimals= dec;
+    decimals= (uint8) dec;
     collation.set_numeric();
     fix_char_length(my_decimal_precision_to_length_no_truncation(len, dec,
                                                                  unsigned_flag));
@@ -524,18 +568,36 @@ public:
   my_decimal *val_decimal(my_decimal*);
   enum Item_result result_type () const { return DECIMAL_RESULT; }
   enum_field_types field_type() const { return MYSQL_TYPE_NEWDECIMAL; }
-  void fix_length_and_dec() {};
+  void fix_length_and_dec() {}
   const char *func_name() const { return "decimal_typecast"; }
   virtual void print(String *str, enum_query_type query_type);
 };
 
 
+class Item_double_typecast :public Item_real_func
+{
+public:
+  Item_double_typecast(Item *a, int len, int dec) :Item_real_func(a)
+  {
+    decimals=   (uint8)  dec;
+    max_length= (uint32) len;
+  }
+  double val_real();
+  enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
+  void fix_length_and_dec() { maybe_null= 1; }
+  const char *func_name() const { return "double_typecast"; }
+  virtual void print(String *str, enum_query_type query_type);
+};
+
+
+
 class Item_func_additive_op :public Item_num_op
 {
 public:
   Item_func_additive_op(Item *a,Item *b) :Item_num_op(a,b) {}
   void result_precision();
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -571,6 +633,7 @@ public:
   my_decimal *decimal_op(my_decimal *);
   void result_precision();
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -603,6 +666,7 @@ public:
   }
 
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -617,6 +681,7 @@ public:
   void result_precision();
   void fix_length_and_dec();
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -633,6 +698,7 @@ public:
   void fix_num_length_and_dec();
   uint decimal_precision() const { return args[0]->decimal_precision(); }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -646,6 +712,7 @@ public:
   const char *func_name() const { return "abs"; }
   void fix_length_and_dec();
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 // A class to handle logarithmic and trigonometric functions
@@ -809,6 +876,7 @@ public:
   double real_op();
   my_decimal *decimal_op(my_decimal *);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -821,6 +889,7 @@ public:
   double real_op();
   my_decimal *decimal_op(my_decimal *);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 /* This handles round and truncate */
@@ -841,7 +910,7 @@ public:
 
 class Item_func_rand :public Item_real_func
 {
-  struct rand_struct *rand;
+  struct my_rnd_struct *rand;
   bool first_eval; // TRUE if val_real() is called 1st time
 public:
   Item_func_rand(Item *a) :Item_real_func(a), rand(0), first_eval(TRUE) {}
@@ -852,6 +921,10 @@ public:
   void update_used_tables();
   bool fix_fields(THD *thd, Item **ref);
   void cleanup() { first_eval= TRUE; Item_real_func::cleanup(); }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 private:
   void seed_random (Item * val);  
 };
@@ -885,25 +958,21 @@ class Item_func_min_max :public Item_func
   Item_result cmp_type;
   String tmp_value;
   int cmp_sign;
-  /* TRUE <=> arguments should be compared in the DATETIME context. */
-  bool compare_as_dates;
   /* An item used for issuing warnings while string to DATETIME conversion. */
-  Item *datetime_item;
+  Item *compare_as_dates;
   THD *thd;
 protected:
   enum_field_types cached_field_type;
 public:
   Item_func_min_max(List<Item> &list,int cmp_sign_arg) :Item_func(list),
-    cmp_type(INT_RESULT), cmp_sign(cmp_sign_arg), compare_as_dates(FALSE),
-    datetime_item(0) {}
+    cmp_type(INT_RESULT), cmp_sign(cmp_sign_arg), compare_as_dates(0) {}
   double val_real();
   longlong val_int();
   String *val_str(String *);
   my_decimal *val_decimal(my_decimal *);
+  bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   void fix_length_and_dec();
   enum Item_result result_type () const { return cmp_type; }
-  bool result_as_longlong() { return compare_as_dates; };
-  uint cmp_datetimes(ulonglong *value);
   enum_field_types field_type() const { return cached_field_type; }
 };
 
@@ -1134,6 +1203,10 @@ public:
       max_length= args[0]->max_length;
   }
   bool fix_fields(THD *thd, Item **ref);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1147,6 +1220,10 @@ public:
   const char *func_name() const { return "benchmark"; }
   void fix_length_and_dec() { max_length=1; maybe_null=0; }
   virtual void print(String *str, enum_query_type query_type);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1162,6 +1239,10 @@ public:
     used_tables_cache|= RAND_TABLE_BIT;
   }
   longlong val_int();
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1172,6 +1253,7 @@ class Item_udf_func :public Item_func
 {
 protected:
   udf_handler udf;
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
 
 public:
   Item_udf_func(udf_func *udf_arg)
@@ -1411,6 +1493,10 @@ class Item_func_get_lock :public Item_int_func
   longlong val_int();
   const char *func_name() const { return "get_lock"; }
   void fix_length_and_dec() { max_length=1; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 class Item_func_release_lock :public Item_int_func
@@ -1421,6 +1507,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "release_lock"; }
   void fix_length_and_dec() { max_length=1; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 /* replication functions */
@@ -1434,6 +1524,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "master_pos_wait"; }
   void fix_length_and_dec() { max_length=21; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1506,6 +1600,7 @@ public:
   }
   void save_org_in_field(Field *field) { (void)save_in_field(field, 1, 0); }
   bool register_field_in_read_map(uchar *arg);
+  bool register_field_in_bitmap(uchar *arg);
   bool set_entry(THD *thd, bool create_if_not_exists);
   void cleanup();
 };
@@ -1594,7 +1689,7 @@ class Item_func_get_system_var :public Item_func
   longlong cached_llval;
   double cached_dval;
   String cached_strval;
-  my_bool cached_null_value;
+  bool cached_null_value;
   query_id_t used_query_id;
   uchar cache_present;
 
@@ -1641,7 +1736,6 @@ public:
 
 
 /* for fulltext search */
-#include <ft_global.h>
 
 class Item_func_match :public Item_real_func
 {
@@ -1682,6 +1776,11 @@ public:
 
   bool fix_index();
   void init_search(bool no_order);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    /* TODO: consider adding in support for the MATCH-based virtual columns */
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1701,6 +1800,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "is_free_lock"; }
   void fix_length_and_dec() { decimals=0; max_length=1; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 class Item_func_is_used_lock :public Item_int_func
@@ -1711,6 +1814,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "is_used_lock"; }
   void fix_length_and_dec() { decimals=0; max_length=10; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 /* For type casts */
@@ -1719,7 +1826,7 @@ enum Cast_target
 {
   ITEM_CAST_BINARY, ITEM_CAST_SIGNED_INT, ITEM_CAST_UNSIGNED_INT,
   ITEM_CAST_DATE, ITEM_CAST_TIME, ITEM_CAST_DATETIME, ITEM_CAST_CHAR,
-  ITEM_CAST_DECIMAL
+  ITEM_CAST_DECIMAL, ITEM_CAST_DOUBLE
 };
 
 
@@ -1730,6 +1837,11 @@ public:
   longlong val_int();
   const char *func_name() const { return "row_count"; }
   void fix_length_and_dec() { decimals= 0; maybe_null=0; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1759,6 +1871,9 @@ private:
   bool execute();
   bool execute_impl(THD *thd);
   bool init_result_field(THD *thd);
+
+protected:
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
   
 public:
 
@@ -1838,6 +1953,10 @@ public:
   {
     return sp_result_field;
   }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1848,6 +1967,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "found_rows"; }
   void fix_length_and_dec() { decimals= 0; maybe_null=0; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1862,12 +1985,17 @@ public:
   void fix_length_and_dec()
   { max_length= 21; unsigned_flag=1; }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 Item *get_system_var(THD *thd, enum_var_type var_type, LEX_STRING name,
                      LEX_STRING component);
 extern bool check_reserved_words(LEX_STRING *name);
 extern enum_field_types agg_field_type(Item **items, uint nitems);
+Item *find_date_time_item(Item **args, uint nargs, uint col);
 double my_double_round(double value, longlong dec, bool dec_unsigned,
                        bool truncate);
 bool eval_const_cond(COND *cond);
diff --git a/sql/item_row.cc b/sql/item_row.cc
index 360708fe5f6..df03b0e0ebb 100644
--- a/sql/item_row.cc
+++ b/sql/item_row.cc
@@ -93,12 +93,29 @@ bool Item_row::fix_fields(THD *thd, Item **ref)
     }
     maybe_null|= item->maybe_null;
     with_sum_func= with_sum_func || item->with_sum_func;
+    with_field= with_field || item->with_field;
   }
   fixed= 1;
   return FALSE;
 }
 
 
+bool
+Item_row::eval_not_null_tables(uchar *opt_arg)
+{
+  Item **arg,**arg_end;
+  not_null_tables_cache= 0;
+  if (arg_count)
+  {		
+    for (arg= items, arg_end= items+arg_count; arg != arg_end ; arg++)
+    {
+      not_null_tables_cache|= (*arg)->not_null_tables();
+    }
+  }
+  return FALSE;
+}
+
+
 void Item_row::cleanup()
 {
   DBUG_ENTER("Item_row::cleanup");
@@ -134,6 +151,20 @@ void Item_row::update_used_tables()
   }
 }
 
+
+void Item_row::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  used_tables_cache= 0;
+  const_item_cache= 1;
+  for (uint i= 0; i < arg_count; i++)
+  {
+    items[i]->fix_after_pullout(new_parent, &items[i]);
+    used_tables_cache|= items[i]->used_tables();
+    const_item_cache&= items[i]->const_item();
+  }
+}
+
+
 bool Item_row::check_cols(uint c)
 {
   if (c != arg_count)
diff --git a/sql/item_row.h b/sql/item_row.h
index 7e3dbda5f75..1d34ecfc310 100644
--- a/sql/item_row.h
+++ b/sql/item_row.h
@@ -63,17 +63,20 @@ public:
     return 0;
   };
   bool fix_fields(THD *thd, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   void cleanup();
   void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields);
   table_map used_tables() const { return used_tables_cache; };
   bool const_item() const { return const_item_cache; };
   enum Item_result result_type() const { return ROW_RESULT; }
+  Item_result cmp_type() const { return ROW_RESULT; }
   void update_used_tables();
   table_map not_null_tables() const { return not_null_tables_cache; }
   virtual void print(String *str, enum_query_type query_type);
 
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
   Item *transform(Item_transformer transformer, uchar *arg);
+  bool eval_not_null_tables(uchar *opt_arg);
 
   uint cols() { return arg_count; }
   Item* element_index(uint i) { return items[i]; }
@@ -81,6 +84,7 @@ public:
   bool check_cols(uint c);
   bool null_inside() { return with_null; };
   void bring_value();
+  bool check_vcol_func_processor(uchar *int_arg) {return FALSE; } 
 };
 
 #endif /* ITEM_ROW_INCLUDED */
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 55087879b98..5ca66801796 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -43,7 +43,8 @@
 */
 #include "sql_class.h"                          // set_var.h: THD
 #include "set_var.h"
-#include "mysqld.h"                             // LOCK_uuid_generator
+#include "sql_base.h"
+#include "sql_time.h"
 #include "sql_acl.h"                            // SUPER_ACL
 #include "des_key_file.h"       // st_des_keyschedule, st_des_keyblock
 #include "password.h"           // my_make_scrambled_password,
@@ -174,7 +175,6 @@ String *Item_func_md5::val_str_ascii(String *str)
 {
   DBUG_ASSERT(fixed == 1);
   String * sptr= args[0]->val_str(str);
-  str->set_charset(&my_charset_bin);
   if (sptr)
   {
     uchar digest[16];
@@ -187,6 +187,7 @@ String *Item_func_md5::val_str_ascii(String *str)
       return 0;
     }
     array_to_hex((char *) str->ptr(), digest, 16);
+    str->set_charset(&my_charset_numeric);
     str->length((uint) 32);
     return str;
   }
@@ -213,7 +214,6 @@ String *Item_func_sha::val_str_ascii(String *str)
 {
   DBUG_ASSERT(fixed == 1);
   String * sptr= args[0]->val_str(str);
-  str->set_charset(&my_charset_bin);
   if (sptr)  /* If we got value different from NULL */
   {
     SHA1_CONTEXT context;  /* Context used to generate SHA1 hash */
@@ -223,11 +223,13 @@ String *Item_func_sha::val_str_ascii(String *str)
     /* No need to check error as the only case would be too long message */
     mysql_sha1_input(&context,
                      (const uchar *) sptr->ptr(), sptr->length());
+
     /* Ensure that memory is free and we got result */
     if (!( str->alloc(SHA1_HASH_SIZE*2) ||
            (mysql_sha1_result(&context,digest))))
     {
       array_to_hex((char *) str->ptr(), digest, SHA1_HASH_SIZE);
+      str->set_charset(&my_charset_numeric);
       str->length((uint)  SHA1_HASH_SIZE*2);
       null_value=0;
       return str;
@@ -691,7 +693,8 @@ String *Item_func_des_encrypt::val_str(String *str)
 
   tail= 8 - (res_length % 8);                   // 1..8 marking extra length
   res_length+=tail;
-  tmp_arg.realloc(res_length);
+  if (tmp_arg.realloc(res_length))
+    goto error;
   tmp_arg.length(0);
   tmp_arg.append(res->ptr(), res->length());
   code= ER_OUT_OF_RESOURCES;
@@ -826,7 +829,7 @@ String *Item_func_concat_ws::val_str(String *str)
 
   use_as_buff= &tmp_value;
   str->length(0);				// QQ; Should be removed
-  res=str;
+  res=str;                                      // If 0 arg_count
 
   // Skip until non-null argument is found.
   // If not, return the empty string
@@ -2471,6 +2474,7 @@ void Item_func_make_set::fix_length_and_dec()
   not_null_tables_cache&= item->not_null_tables();
   const_item_cache&=	  item->const_item();
   with_sum_func= with_sum_func || item->with_sum_func;
+  with_field= with_field || item->with_field;
 }
 
 
@@ -3260,7 +3264,7 @@ String *Item_load_file::val_str(String *str)
 			func_name(), current_thd->variables.max_allowed_packet);
     goto err;
   }
-  if (tmp_value.alloc(stat_info.st_size))
+  if (tmp_value.alloc((size_t)stat_info.st_size))
     goto err;
   if ((file= mysql_file_open(key_file_loadfile,
                              file_name->ptr(), O_RDONLY, MYF(0))) < 0)
@@ -3271,7 +3275,7 @@ String *Item_load_file::val_str(String *str)
     mysql_file_close(file, MYF(0));
     goto err;
   }
-  tmp_value.length(stat_info.st_size);
+  tmp_value.length((uint32)stat_info.st_size);
   mysql_file_close(file, MYF(0));
   null_value = 0;
   DBUG_RETURN(&tmp_value);
@@ -3613,7 +3617,7 @@ longlong Item_func_crc32::val_int()
 String *Item_func_compress::val_str(String *str)
 {
   int err= Z_OK, code;
-  ulong new_size;
+  size_t new_size;
   String *res;
   Byte *body;
   char *tmp, *last_char;
@@ -3649,8 +3653,8 @@ String *Item_func_compress::val_str(String *str)
   body= ((Byte*)buffer.ptr()) + 4;
 
   // As far as we have checked res->is_empty() we can use ptr()
-  if ((err= compress(body, &new_size,
-		     (const Bytef*)res->ptr(), res->length())) != Z_OK)
+  if ((err= my_compress_buffer(body, &new_size, (const uchar *)res->ptr(),
+                               res->length())) != Z_OK)
   {
     code= err==Z_MEM_ERROR ? ER_ZLIB_Z_MEM_ERROR : ER_ZLIB_Z_BUF_ERROR;
     push_warning(current_thd,MYSQL_ERROR::WARN_LEVEL_WARN,code,ER(code));
@@ -3728,156 +3732,783 @@ err:
 }
 #endif
 
-/*
-  UUID, as in
-    DCE 1.1: Remote Procedure Call,
-    Open Group Technical Standard Document Number C706, October 1997,
-    (supersedes C309 DCE: Remote Procedure Call 8/1994,
-    which was basis for ISO/IEC 11578:1996 specification)
-*/
 
-static struct rand_struct uuid_rand;
-static uint nanoseq;
-static ulonglong uuid_time=0;
-static char clock_seq_and_node_str[]="-0000-000000000000";
+String *Item_func_uuid::val_str(String *str)
+{
+  DBUG_ASSERT(fixed == 1);
+  uchar guid[MY_UUID_SIZE];
+
+  str->realloc(MY_UUID_STRING_LENGTH+1);
+  str->length(MY_UUID_STRING_LENGTH);
+  str->set_charset(system_charset_info);
+  my_uuid(guid);
+  my_uuid2str(guid, (char *)str->ptr());
 
-/**
-  number of 100-nanosecond intervals between
-  1582-10-15 00:00:00.00 and 1970-01-01 00:00:00.00.
-*/
-#define UUID_TIME_OFFSET ((ulonglong) 141427 * 24 * 60 * 60 * \
-                          1000 * 1000 * 10)
+  return str;
+}
 
-#define UUID_VERSION      0x1000
-#define UUID_VARIANT      0x8000
 
-static void tohex(char *to, uint from, uint len)
+Item_func_dyncol_create::Item_func_dyncol_create(List<Item> &args,
+                                                 DYNCALL_CREATE_DEF *dfs)
+  : Item_str_func(args), defs(dfs), vals(0), nums(0)
 {
-  to+= len;
-  while (len--)
-  {
-    *--to= _dig_vec_lower[from & 15];
-    from >>= 4;
-  }
+  DBUG_ASSERT((args.elements & 0x1) == 0); // even number of arguments
 }
 
-static void set_clock_seq_str()
+
+bool Item_func_dyncol_create::fix_fields(THD *thd, Item **ref)
 {
-  uint16 clock_seq= ((uint)(my_rnd(&uuid_rand)*16383)) | UUID_VARIANT;
-  tohex(clock_seq_and_node_str+1, clock_seq, 4);
-  nanoseq= 0;
+  bool res= Item_func::fix_fields(thd, ref); // no need Item_str_func here
+  vals= (DYNAMIC_COLUMN_VALUE *) alloc_root(thd->mem_root,
+                                            sizeof(DYNAMIC_COLUMN_VALUE) *
+                                            (arg_count / 2));
+  nums= (uint *) alloc_root(thd->mem_root,
+                            sizeof(uint) * (arg_count / 2));
+  return res || vals == 0 || nums == 0;
 }
 
-String *Item_func_uuid::val_str(String *str)
+
+void Item_func_dyncol_create::fix_length_and_dec()
 {
-  DBUG_ASSERT(fixed == 1);
-  char *s;
-  THD *thd= current_thd;
+  maybe_null= TRUE;
+  collation.set(&my_charset_bin);
+  decimals= 0;
+}
 
-  mysql_mutex_lock(&LOCK_uuid_generator);
-  if (! uuid_time) /* first UUID() call. initializing data */
+void Item_func_dyncol_create::prepare_arguments()
+{
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String *res, tmp(buff, sizeof(buff), &my_charset_bin);
+  uint column_count= (arg_count / 2);
+  uint i;
+  my_decimal dtmp, *dres;
+
+  /* get values */
+  for (i= 0; i < column_count; i++)
   {
-    ulong tmp=sql_rnd_with_mutex();
-    uchar mac[6];
-    int i;
-    if (my_gethwaddr(mac))
+    uint valpos= i * 2 + 1;
+    DYNAMIC_COLUMN_TYPE type= defs[i].type;
+    if (type == DYN_COL_NULL) // auto detect
     {
-      /* purecov: begin inspected */
       /*
-        generating random "hardware addr"
-        and because specs explicitly specify that it should NOT correlate
-        with a clock_seq value (initialized random below), we use a separate
-        randominit() here
+        We don't have a default here to ensure we get a warning if
+        one adds a new not handled MYSQL_TYPE_...
       */
-      randominit(&uuid_rand, tmp + (ulong) thd, tmp + (ulong)global_query_id);
-      for (i=0; i < (int)sizeof(mac); i++)
-        mac[i]=(uchar)(my_rnd(&uuid_rand)*255);
-      /* purecov: end */    
+      switch (args[valpos]->field_type()) {
+      case MYSQL_TYPE_DECIMAL:
+      case MYSQL_TYPE_NEWDECIMAL:
+        type= DYN_COL_DECIMAL;
+        break;
+      case MYSQL_TYPE_TINY:
+      case MYSQL_TYPE_SHORT:
+      case MYSQL_TYPE_LONG:
+      case MYSQL_TYPE_LONGLONG:
+      case MYSQL_TYPE_INT24:
+      case MYSQL_TYPE_YEAR:
+      case MYSQL_TYPE_BIT:
+        type= args[valpos]->unsigned_flag ? DYN_COL_UINT : DYN_COL_INT;
+        break;
+      case MYSQL_TYPE_FLOAT:
+      case MYSQL_TYPE_DOUBLE:
+        type= DYN_COL_DOUBLE;
+        break;
+      case MYSQL_TYPE_NULL:
+        type= DYN_COL_NULL;
+        break;
+      case MYSQL_TYPE_TIMESTAMP:
+      case MYSQL_TYPE_DATETIME:
+        type= DYN_COL_DATETIME;
+	break;
+      case MYSQL_TYPE_DATE:
+      case MYSQL_TYPE_NEWDATE:
+        type= DYN_COL_DATE;
+        break;
+      case MYSQL_TYPE_TIME:
+        type= DYN_COL_TIME;
+        break;
+      case MYSQL_TYPE_VARCHAR:
+      case MYSQL_TYPE_ENUM:
+      case MYSQL_TYPE_SET:
+      case MYSQL_TYPE_TINY_BLOB:
+      case MYSQL_TYPE_MEDIUM_BLOB:
+      case MYSQL_TYPE_LONG_BLOB:
+      case MYSQL_TYPE_BLOB:
+      case MYSQL_TYPE_VAR_STRING:
+      case MYSQL_TYPE_STRING:
+      case MYSQL_TYPE_GEOMETRY:
+        type= DYN_COL_STRING;
+        break;
+      }
     }
-    s=clock_seq_and_node_str+sizeof(clock_seq_and_node_str)-1;
-    for (i=sizeof(mac)-1 ; i>=0 ; i--)
+    nums[i]= (uint) args[i * 2]->val_int();
+    vals[i].type= type;
+    switch (type) {
+    case DYN_COL_NULL:
+      DBUG_ASSERT(args[valpos]->field_type() == MYSQL_TYPE_NULL);
+      break;
+    case DYN_COL_INT:
+      vals[i].long_value= args[valpos]->val_int();
+      break;
+    case DYN_COL_UINT:
+      vals[i].ulong_value= args[valpos]->val_int();
+      break;
+    case DYN_COL_DOUBLE:
+      vals[i].double_value= args[valpos]->val_real();
+      break;
+    case DYN_COL_STRING:
+      res= args[valpos]->val_str(&tmp);
+      if (res &&
+          (vals[i].string_value.str= my_strndup(res->ptr(), res->length(),
+                                                MYF(MY_WME))))
+      {
+	vals[i].string_value.length= res->length();
+	vals[i].charset= res->charset();
+      }
+      else
+      {
+        args[valpos]->null_value= 1;            // In case of out of memory
+        vals[i].string_value.str= NULL;
+        vals[i].string_value.length= 0;         // just to be safe
+      }
+      break;
+    case DYN_COL_DECIMAL:
+      if ((dres= args[valpos]->val_decimal(&dtmp)))
+      {
+	dynamic_column_prepare_decimal(&vals[i]);
+        DBUG_ASSERT(vals[i].decimal_value.len == dres->len);
+        vals[i].decimal_value.intg= dres->intg;
+        vals[i].decimal_value.frac= dres->frac;
+        vals[i].decimal_value.sign= dres->sign();
+        memcpy(vals[i].decimal_buffer, dres->buf,
+               sizeof(vals[i].decimal_buffer));
+      }
+      else
+      {
+	dynamic_column_prepare_decimal(&vals[i]); // just to be safe
+        DBUG_ASSERT(args[valpos]->null_value);
+      }
+      break;
+    case DYN_COL_DATETIME:
+      args[valpos]->get_date(&vals[i].time_value, TIME_FUZZY_DATE);
+      break;
+    case DYN_COL_DATE:
+      args[valpos]->get_date(&vals[i].time_value, TIME_FUZZY_DATE);
+      break;
+    case DYN_COL_TIME:
+      args[valpos]->get_time(&vals[i].time_value);
+      break;
+    default:
+      DBUG_ASSERT(0);
+      vals[i].type= DYN_COL_NULL;
+    }
+    if (vals[i].type != DYN_COL_NULL && args[valpos]->null_value)
     {
-      *--s=_dig_vec_lower[mac[i] & 15];
-      *--s=_dig_vec_lower[mac[i] >> 4];
+      if (vals[i].type == DYN_COL_STRING)
+        my_free(vals[i].string_value.str);
+      vals[i].type= DYN_COL_NULL;
     }
-    randominit(&uuid_rand, tmp + (ulong) server_start_time,
-	       tmp + (ulong) thd->status_var.bytes_sent);
-    set_clock_seq_str();
   }
+}
 
-  ulonglong tv= my_getsystime() + UUID_TIME_OFFSET + nanoseq;
+void Item_func_dyncol_create::cleanup_arguments()
+{
+  uint column_count= (arg_count / 2);
+  uint i;
 
-  if (likely(tv > uuid_time))
+  for (i= 0; i < column_count; i++)
   {
-    /*
-      Current time is ahead of last timestamp, as it should be.
-      If we "borrowed time", give it back, just as long as we
-      stay ahead of the previous timestamp.
-    */
-    if (nanoseq)
+    if (vals[i].type == DYN_COL_STRING)
+      my_free(vals[i].string_value.str);
+  }
+}
+
+String *Item_func_dyncol_create::val_str(String *str)
+{
+  DYNAMIC_COLUMN col;
+  String *res;
+  uint column_count= (arg_count / 2);
+  enum enum_dyncol_func_result rc;
+  DBUG_ASSERT((arg_count & 0x1) == 0); // even number of arguments
+
+  prepare_arguments();
+
+  if ((rc= dynamic_column_create_many(&col, column_count, nums, vals)))
+  {
+    dynamic_column_error_message(rc);
+    dynamic_column_column_free(&col);
+    res= NULL;
+    null_value= TRUE;
+  }
+  else
+  {
+    /* Move result from DYNAMIC_COLUMN to str_value */
+    char *ptr;
+    size_t length, alloc_length;
+    dynamic_column_reassociate(&col, &ptr, &length, &alloc_length);
+    str_value.reassociate(ptr, (uint32) length, (uint32) alloc_length,
+                          &my_charset_bin);
+    res= &str_value;
+    null_value= FALSE;
+  }
+
+  /* cleanup */
+  cleanup_arguments();
+
+  return res;
+}
+
+void Item_func_dyncol_create::print_arguments(String *str,
+                                              enum_query_type query_type)
+{
+  uint i;
+  uint column_count= (arg_count / 2);
+  for (i= 0; i < column_count; i++)
+  {
+    args[i*2]->print(str, query_type);
+    str->append(',');
+    args[i*2 + 1]->print(str, query_type);
+    switch (defs[i].type) {
+    case DYN_COL_NULL: // automatic type => write nothing
+      break;
+    case DYN_COL_INT:
+      str->append(STRING_WITH_LEN(" AS int"));
+      break;
+    case DYN_COL_UINT:
+      str->append(STRING_WITH_LEN(" AS unsigned int"));
+      break;
+    case DYN_COL_DOUBLE:
+      str->append(STRING_WITH_LEN(" AS double"));
+      break;
+    case DYN_COL_STRING:
+      str->append(STRING_WITH_LEN(" AS char"));
+      if (defs[i].cs)
+      {
+        str->append(STRING_WITH_LEN(" charset "));
+        str->append(defs[i].cs->csname);
+        str->append(' ');
+      }
+      break;
+    case DYN_COL_DECIMAL:
+      str->append(STRING_WITH_LEN(" AS decimal"));
+      break;
+    case DYN_COL_DATETIME:
+      str->append(STRING_WITH_LEN(" AS datetime"));
+      break;
+    case DYN_COL_DATE:
+      str->append(STRING_WITH_LEN(" AS date"));
+      break;
+    case DYN_COL_TIME:
+      str->append(STRING_WITH_LEN(" AS time"));
+      break;
+    }
+    if (i < column_count - 1)
+      str->append(',');
+  }
+}
+
+
+void Item_func_dyncol_create::print(String *str,
+                                    enum_query_type query_type)
+{
+  DBUG_ASSERT((arg_count & 0x1) == 0); // even number of arguments
+  str->append(STRING_WITH_LEN("column_create("));
+  print_arguments(str, query_type);
+  str->append(')');
+}
+
+
+String *Item_func_dyncol_add::val_str(String *str)
+{
+  DYNAMIC_COLUMN col;
+  String *res;
+  uint column_count=  (arg_count / 2);
+  enum enum_dyncol_func_result rc;
+  DBUG_ASSERT((arg_count & 0x1) == 1); // odd number of arguments
+
+  /* We store the packed data last */
+  res= args[arg_count - 1]->val_str(str);
+  if (args[arg_count - 1]->null_value)
+    goto null;
+  init_dynamic_string(&col, NULL, res->length() + STRING_BUFFER_USUAL_SIZE,
+                      STRING_BUFFER_USUAL_SIZE);
+
+  col.length= res->length();
+  memcpy(col.str, res->ptr(), col.length);
+
+  prepare_arguments();
+
+  if ((rc= dynamic_column_update_many(&col, column_count, nums, vals)))
+  {
+    dynamic_column_error_message(rc);
+    dynamic_column_column_free(&col);
+    cleanup_arguments();
+    goto null;
+  }
+
+  {
+    /* Move result from DYNAMIC_COLUMN to str */
+    char *ptr;
+    size_t length, alloc_length;
+    dynamic_column_reassociate(&col, &ptr, &length, &alloc_length);
+    str->reassociate(ptr, (uint32) length, (uint32) alloc_length,
+                     &my_charset_bin);
+    null_value= FALSE;
+  }
+
+  /* cleanup */
+  dynamic_column_column_free(&col);
+  cleanup_arguments();
+
+  return str;
+
+null:
+  null_value= TRUE;
+  return NULL;
+}
+
+
+void Item_func_dyncol_add::print(String *str,
+                                 enum_query_type query_type)
+{
+  DBUG_ASSERT((arg_count & 0x1) == 1); // odd number of arguments
+  str->append(STRING_WITH_LEN("column_create("));
+  args[arg_count - 1]->print(str, query_type);
+  str->append(',');
+  print_arguments(str, query_type);
+  str->append(')');
+}
+
+
+/**
+  Get value for a column stored in a dynamic column
+
+  @notes
+  This function ensures that null_value is set correctly
+*/
+
+bool Item_dyncol_get::get_dyn_value(DYNAMIC_COLUMN_VALUE *val, String *tmp)
+{
+  DYNAMIC_COLUMN dyn_str;
+  String *res;
+  longlong num;
+  enum enum_dyncol_func_result rc;
+
+  num= args[1]->val_int();
+  if (args[1]->null_value || num < 0 || num > INT_MAX)
+  {
+    null_value= 1;
+    return 1;
+  }
+
+  res= args[0]->val_str(tmp);
+  if (args[0]->null_value)
+  {
+    null_value= 1;
+    return 1;
+  }
+
+  dyn_str.str=   (char*) res->ptr();
+  dyn_str.length= res->length();
+  if ((rc= dynamic_column_get(&dyn_str, (uint) num, val)))
+  {
+    dynamic_column_error_message(rc);
+    null_value= 1;
+    return 1;
+  }
+
+  null_value= 0;
+  return 0;                                     // ok
+}
+
+
+String *Item_dyncol_get::val_str(String *str_result)
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+
+  if (get_dyn_value(&val, &tmp))
+    return NULL;
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_INT:
+  case DYN_COL_UINT:
+    str_result->set_int(val.long_value, test(val.type == DYN_COL_UINT),
+                       &my_charset_latin1);
+    break;
+  case DYN_COL_DOUBLE:
+    str_result->set_real(val.double_value, NOT_FIXED_DEC, &my_charset_latin1);
+    break;
+  case DYN_COL_STRING:
+    if ((char*) tmp.ptr() <= val.string_value.str &&
+        (char*) tmp.ptr() + tmp.length() >= val.string_value.str)
+    {
+      /* value is allocated in tmp buffer; We have to make a copy */
+      str_result->copy(val.string_value.str, val.string_value.length,
+                      val.charset);
+    }
+    else
     {
-      DBUG_ASSERT((tv > uuid_time) && (nanoseq > 0));
       /*
-        -1 so we won't make tv= uuid_time for nanoseq >= (tv - uuid_time)
+        It's safe to use the current value because it's either pointing
+        into a field or in a buffer for another item and this buffer
+        is not going to be deleted during expression evaluation
       */
-      ulong delta= min(nanoseq, (ulong) (tv - uuid_time -1));
-      tv-= delta;
-      nanoseq-= delta;
+      str_result->set(val.string_value.str, val.string_value.length,
+                      val.charset);
+    }
+    break;
+  case DYN_COL_DECIMAL:
+  {
+    int res;
+    int length=
+      my_decimal_string_length((const my_decimal*)&val.decimal_value);
+    if (str_result->alloc(length))
+      goto null;
+    if ((res= decimal2string(&val.decimal_value, (char*) str_result->ptr(),
+                             &length, 0, 0, ' ')) != E_DEC_OK)
+    {
+      char buff[40];
+      int len= sizeof(buff);
+      DBUG_ASSERT(length < (int)sizeof(buff));
+      decimal2string(&val.decimal_value, buff, &len, 0, 0, ' ');
+      decimal_operation_results(res, buff, "CHAR");
     }
+    str_result->set_charset(&my_charset_latin1);
+    str_result->length(length);
+    break;
   }
-  else
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
   {
-    if (unlikely(tv == uuid_time))
+    int length;
+    /*
+      We use AUTO_SEC_PART_DIGITS here to ensure that we do not loose
+      any microseconds from the data. This is safe to do as we are
+      asked to return the time argument as a string.
+    */
+    if (str_result->alloc(MAX_DATE_STRING_REP_LENGTH) ||
+        !(length= my_TIME_to_str(&val.time_value, (char*) str_result->ptr(),
+                                 AUTO_SEC_PART_DIGITS)))
+      goto null;
+    str_result->set_charset(&my_charset_latin1);
+    str_result->length(length);
+    break;
+  }
+  }
+  return str_result;
+
+null:
+  null_value= TRUE;
+  return 0;
+}
+
+
+longlong Item_dyncol_get::val_int()
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+
+  if (get_dyn_value(&val, &tmp))
+    return 0;
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_UINT:
+    unsigned_flag= 1;            // Make it possible for caller to detect sign
+    return val.long_value;
+  case DYN_COL_INT:
+    unsigned_flag= 0;            // Make it possible for caller to detect sign
+    return val.long_value;
+  case DYN_COL_DOUBLE:
+  {
+    bool error;
+    longlong num;
+
+    num= double_to_longlong(val.double_value, unsigned_flag, &error);
+    if (error)
     {
-      /*
-        For low-res system clocks. If several requests for UUIDs
-        end up on the same tick, we add a nano-second to make them
-        different.
-        ( current_timestamp + nanoseq * calls_in_this_period )
-        may end up > next_timestamp; this is OK. Nonetheless, we'll
-        try to unwind nanoseq when we get a chance to.
-        If nanoseq overflows, we'll start over with a new numberspace
-        (so the if() below is needed so we can avoid the ++tv and thus
-        match the follow-up if() if nanoseq overflows!).
-      */
-      if (likely(++nanoseq))
-        ++tv;
+      char buff[30];
+      sprintf(buff, "%lg", val.double_value);
+      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_DATA_OVERFLOW,
+                          ER(ER_DATA_OVERFLOW),
+                          buff,
+                          unsigned_flag ? "UNSIGNED INT" : "INT");
     }
+    return num;
+  }
+  case DYN_COL_STRING:
+  {
+    int error;
+    longlong num;
+    char *end= val.string_value.str + val.string_value.length, *org_end= end;
 
-    if (unlikely(tv <= uuid_time))
+    num= my_strtoll10(val.string_value.str, &end, &error);
+    if (end != org_end || error > 0)
     {
-      /*
-        If the admin changes the system clock (or due to Daylight
-        Saving Time), the system clock may be turned *back* so we
-        go through a period once more for which we already gave out
-        UUIDs.  To avoid duplicate UUIDs despite potentially identical
-        times, we make a new random component.
-        We also come here if the nanoseq "borrowing" overflows.
-        In either case, we throw away any nanoseq borrowing since it's
-        irrelevant in the new numberspace.
-      */
-      set_clock_seq_str();
-      tv= my_getsystime() + UUID_TIME_OFFSET;
-      nanoseq= 0;
-      DBUG_PRINT("uuid",("making new numberspace"));
+      char buff[80];
+      strmake(buff, val.string_value.str, min(sizeof(buff)-1,
+                                              val.string_value.length));
+      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_BAD_DATA,
+                          ER(ER_BAD_DATA),
+                          buff,
+                          unsigned_flag ? "UNSIGNED INT" : "INT");
     }
+    unsigned_flag= error >= 0;
+    return num;
+  }
+  case DYN_COL_DECIMAL:
+  {
+    longlong num;
+    my_decimal2int(E_DEC_FATAL_ERROR, &val.decimal_value, unsigned_flag,
+                   &num);
+    return num;
   }
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+    unsigned_flag= !val.time_value.neg;
+    if (unsigned_flag)
+      return TIME_to_ulonglong(&val.time_value);
+    else
+      return -(longlong)TIME_to_ulonglong(&val.time_value);
+  }
+
+null:
+  null_value= TRUE;
+  return 0;
+}
 
-  uuid_time=tv;
-  mysql_mutex_unlock(&LOCK_uuid_generator);
 
-  uint32 time_low=            (uint32) (tv & 0xFFFFFFFF);
-  uint16 time_mid=            (uint16) ((tv >> 32) & 0xFFFF);
-  uint16 time_hi_and_version= (uint16) ((tv >> 48) | UUID_VERSION);
+double Item_dyncol_get::val_real()
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
 
-  str->realloc(UUID_LENGTH+1);
-  str->length(UUID_LENGTH);
-  str->set_charset(system_charset_info);
-  s=(char *) str->ptr();
-  s[8]=s[13]='-';
-  tohex(s, time_low, 8);
-  tohex(s+9, time_mid, 4);
-  tohex(s+14, time_hi_and_version, 4);
-  strmov(s+18, clock_seq_and_node_str);
+  if (get_dyn_value(&val, &tmp))
+    return 0.0;
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_UINT:
+    return ulonglong2double(val.ulong_value);
+  case DYN_COL_INT:
+    return (double) val.long_value;
+  case DYN_COL_DOUBLE:
+    return (double) val.double_value;
+  case DYN_COL_STRING:
+  {
+    int error;
+    char *end;
+    double res= my_strntod(val.charset, (char*) val.string_value.str,
+                           val.string_value.length, &end, &error);
+
+    if (end != (char*) val.string_value.str + val.string_value.length ||
+        error)
+    {
+      char buff[80];
+      strmake(buff, val.string_value.str, min(sizeof(buff)-1,
+                                              val.string_value.length));
+      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_BAD_DATA,
+                          ER(ER_BAD_DATA),
+                          buff, "DOUBLE");
+    }
+    return res;
+  }
+  case DYN_COL_DECIMAL:
+  {
+    double res;
+    /* This will always succeed */
+    decimal2double(&val.decimal_value, &res);
+    return res;
+  }
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+    return TIME_to_double(&val.time_value);
+  }
+
+null:
+  null_value= TRUE;
+  return 0.0;
+}
+
+
+my_decimal *Item_dyncol_get::val_decimal(my_decimal *decimal_value)
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+
+  if (get_dyn_value(&val, &tmp))
+    return NULL;
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_UINT:
+    int2my_decimal(E_DEC_FATAL_ERROR, val.long_value, TRUE, decimal_value);
+    break;
+  case DYN_COL_INT:
+    int2my_decimal(E_DEC_FATAL_ERROR, val.long_value, FALSE, decimal_value);
+    break;
+  case DYN_COL_DOUBLE:
+    double2my_decimal(E_DEC_FATAL_ERROR, val.double_value, decimal_value);
+    break;
+  case DYN_COL_STRING:
+  {
+    int rc;
+    rc= str2my_decimal(0, val.string_value.str, val.string_value.length,
+                       val.charset, decimal_value);
+    char buff[80];
+    strmake(buff, val.string_value.str, min(sizeof(buff)-1,
+                                            val.string_value.length));
+    if (rc != E_DEC_OK)
+    {
+      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_BAD_DATA,
+                          ER(ER_BAD_DATA),
+                          buff, "DECIMAL");
+    }
+    break;
+  }
+  case DYN_COL_DECIMAL:
+    decimal2my_decimal(&val.decimal_value, decimal_value);
+    break;
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+    decimal_value= seconds2my_decimal(val.time_value.neg,
+                                      TIME_to_ulonglong(&val.time_value),
+                                      val.time_value.second_part,
+                                      decimal_value);
+    break;
+  }
+  return decimal_value;
+
+null:
+  null_value= TRUE;
+  return 0;
+}
+
+
+bool Item_dyncol_get::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+  bool signed_value= 0;
+
+  if (get_dyn_value(&val, &tmp))
+    return 1;                                   // Error
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_INT:
+    signed_value= 1;                                  // For error message
+    /* fall_trough */
+  case DYN_COL_UINT:
+    if (signed_value || val.ulong_value <= LONGLONG_MAX)
+    {
+      if (int_to_datetime_with_warn(val.ulong_value, ltime, fuzzy_date,
+                                   0 /* TODO */))
+        goto null;
+      return 0;
+    }
+    /* let double_to_datetime_with_warn() issue the warning message */
+    val.double_value= static_cast<double>(ULONGLONG_MAX);
+    /* fall_trough */
+  case DYN_COL_DOUBLE:
+    if (double_to_datetime_with_warn(val.double_value, ltime, fuzzy_date,
+                                     0 /* TODO */))
+      goto null;
+    return 0;
+  case DYN_COL_DECIMAL:
+    if (decimal_to_datetime_with_warn((my_decimal*)&val.decimal_value, ltime,
+                                      fuzzy_date, 0 /* TODO */))
+      goto null;
+    return 0;
+  case DYN_COL_STRING:
+    if (str_to_datetime_with_warn(&my_charset_numeric,
+                                  val.string_value.str,
+                                  val.string_value.length,
+                                  ltime, fuzzy_date) <= MYSQL_TIMESTAMP_ERROR)
+      goto null;
+    return 0;
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+    *ltime= val.time_value;
+    return 0;
+  }
+
+null:
+  null_value= TRUE;
+  return 1;
+}
+
+
+void Item_dyncol_get::print(String *str, enum_query_type query_type)
+{
+  str->append(STRING_WITH_LEN("column_get("));
+  args[0]->print(str, query_type);
+  str->append(',');
+  args[1]->print(str, query_type);
+  str->append(')');
+}
+
+
+String *Item_func_dyncol_list::val_str(String *str)
+{
+  uint i;
+  enum enum_dyncol_func_result rc;
+  DYNAMIC_ARRAY arr;
+  DYNAMIC_COLUMN col;
+  String *res= args[0]->val_str(str);
+
+  if (args[0]->null_value)
+    goto null;
+  col.length= res->length();
+  /* We do not change the string, so could do this trick */
+  col.str= (char *)res->ptr();
+  if ((rc= dynamic_column_list(&col, &arr)))
+  {
+    dynamic_column_error_message(rc);
+    delete_dynamic(&arr);
+    goto null;
+  }
+
+  /*
+    We support elements from 0 - 65536, so max size for one element is
+    6 (including ,).
+  */
+  if (str->alloc(arr.elements * 6))
+    goto null;
+
+  str->length(0);
+  for (i= 0; i < arr.elements; i++)
+  {
+    str->qs_append(*dynamic_element(&arr, i, uint*));
+    if (i < arr.elements - 1)
+      str->qs_append(',');
+  }
+
+  null_value= FALSE;
+  delete_dynamic(&arr);
   return str;
+
+null:
+  null_value= TRUE;
+  return NULL;
 }
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 00a0b35ef58..b15179e641b 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -33,8 +33,15 @@ protected:
      character set. No memory is allocated.
      @retval A pointer to the str_value member.
    */
-  String *make_empty_result() {
-    str_value.set("", 0, collation.collation);
+  String *make_empty_result()
+  {
+    /*
+      Reset string length to an empty string. We don't use str_value.set() as
+      we don't want to free and potentially have to reallocate the buffer
+      for each call.
+    */
+    str_value.length(0);
+    str_value.set_charset(collation.collation);
     return &str_value; 
   }
 public:
@@ -392,6 +399,10 @@ public:
   String *val_str(String *);
   void fix_length_and_dec() { maybe_null=1; max_length = 13; }
   const char *func_name() const { return "encrypt"; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 #include "sql_crypt.h"
@@ -440,6 +451,11 @@ public:
     call
   */
   virtual const char *fully_qualified_func_name() const = 0;
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(
+                                           fully_qualified_func_name());
+  }
 };
 
 
@@ -748,6 +764,10 @@ public:
     maybe_null=1;
     max_length=MAX_BLOB_WIDTH;
   }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -931,7 +951,7 @@ public:
   String *val_str(String *) ZLIB_DEPENDED_FUNCTION
 };
 
-#define UUID_LENGTH (8+1+4+1+4+1+4+1+12)
+
 class Item_func_uuid: public Item_str_func
 {
 public:
@@ -940,10 +960,83 @@ public:
   {
     collation.set(system_charset_info,
                   DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
-    fix_char_length(UUID_LENGTH);
+    fix_char_length(MY_UUID_STRING_LENGTH);
   }
   const char *func_name() const{ return "uuid"; }
   String *val_str(String *);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
+};
+
+
+class Item_func_dyncol_create: public Item_str_func
+{
+protected:
+  DYNCALL_CREATE_DEF *defs;
+  DYNAMIC_COLUMN_VALUE *vals;
+  uint *nums;
+  void prepare_arguments();
+  void cleanup_arguments();
+  void print_arguments(String *str, enum_query_type query_type);
+public:
+  Item_func_dyncol_create(List<Item> &args, DYNCALL_CREATE_DEF *dfs);
+  bool fix_fields(THD *thd, Item **ref);
+  void fix_length_and_dec();
+  const char *func_name() const{ return "column_create"; }
+  String *val_str(String *);
+  virtual void print(String *str, enum_query_type query_type);
+};
+
+
+class Item_func_dyncol_add: public Item_func_dyncol_create
+{
+public:
+  Item_func_dyncol_add(List<Item> &args, DYNCALL_CREATE_DEF *dfs)
+    :Item_func_dyncol_create(args, dfs)
+  {}
+  const char *func_name() const{ return "column_add"; }
+  String *val_str(String *);
+  virtual void print(String *str, enum_query_type query_type);
+};
+
+
+/*
+  The following functions is always called from an Item_cast function
+*/
+
+class Item_dyncol_get: public Item_str_func
+{
+public:
+  Item_dyncol_get(Item *str, Item *num)
+    :Item_str_func(str, num)
+  {
+    max_length= MAX_DYNAMIC_COLUMN_LENGTH;
+  }
+  void fix_length_and_dec()
+  { maybe_null= 1; }
+  /* Mark that collation can change between calls */
+  bool dynamic_result() { return 1; }
+
+  const char *func_name() const { return "column_get"; }
+  String *val_str(String *);
+  longlong val_int();
+  double val_real();
+  my_decimal *val_decimal(my_decimal *);
+  bool get_dyn_value(DYNAMIC_COLUMN_VALUE *val, String *tmp);
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
+  void print(String *str, enum_query_type query_type);
+};
+
+
+class Item_func_dyncol_list: public Item_str_func
+{
+public:
+  Item_func_dyncol_list(Item *str) :Item_str_func(str) {};
+  void fix_length_and_dec() { maybe_null= 1; max_length= MAX_BLOB_WIDTH; };
+  const char *func_name() const{ return "column_list"; }
+  String *val_str(String *);
 };
 
 extern String my_empty_string;
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 0c4b96f7be0..40f1e753d60 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2002, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -40,28 +41,34 @@
 #include "sql_parse.h"                          // check_stack_overrun
 #include "sql_test.h"
 
-inline Item * and_items(Item* cond, Item *item)
-{
-  return (cond? (new Item_cond_and(cond, item)) : item);
-}
+double get_post_group_estimate(JOIN* join, double join_op_rows);
+
 
 Item_subselect::Item_subselect():
-  Item_result_field(), value_assigned(0), thd(0), substitution(0),
-  engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0),
-  const_item_cache(1), engine_changed(0), changed(0), is_correlated(FALSE)
+  Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0), 
+  used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1),
+  inside_first_fix_fields(0), done_first_fix_fields(FALSE), 
+  expr_cache(0), forced_const(FALSE), substitution(0), engine(0), eliminated(FALSE),
+  engine_changed(0), changed(0), is_correlated(FALSE)
 {
+  DBUG_ENTER("Item_subselect::Item_subselect");
+  DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
+#ifndef DBUG_OFF
+  exec_counter= 0;
+#endif
   with_subselect= 1;
   reset();
   /*
-    item value is NULL if select_subselect not changed this value
+    Item value is NULL if select_result_interceptor didn't change this value
     (i.e. some rows will be found returned)
   */
   null_value= TRUE;
+  DBUG_VOID_RETURN;
 }
 
 
 void Item_subselect::init(st_select_lex *select_lex,
-			  select_subselect *result)
+			  select_result_interceptor *result)
 {
   /*
     Please see Item_singlerow_subselect::invalidate_and_restore_select_lex(),
@@ -69,8 +76,10 @@ void Item_subselect::init(st_select_lex *select_lex,
   */
 
   DBUG_ENTER("Item_subselect::init");
-  DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex));
+  DBUG_PRINT("enter", ("select_lex: 0x%lx  this: 0x%lx",
+                       (ulong) select_lex, (ulong) this));
   unit= select_lex->master_unit();
+  thd= unit->thd;
 
   if (unit->item)
   {
@@ -79,10 +88,10 @@ void Item_subselect::init(st_select_lex *select_lex,
       => we do not copy old_engine here
     */
     engine= unit->item->engine;
+    own_engine= FALSE;
     parsing_place= unit->item->parsing_place;
-    unit->item->engine= 0;
-    unit->item= this;
-    engine->change_result(this, result);
+    thd->change_item_tree((Item**)&unit->item, this);
+    engine->change_result(this, result, TRUE);
   }
   else
   {
@@ -95,15 +104,18 @@ void Item_subselect::init(st_select_lex *select_lex,
                     NO_MATTER :
                     outer_select->parsing_place);
     if (unit->is_union())
-      engine= new subselect_union_engine(unit, result, this);
+      engine= new subselect_union_engine(thd, unit, result, this);
     else
-      engine= new subselect_single_select_engine(select_lex, result, this);
+      engine= new subselect_single_select_engine(thd, select_lex, result, this);
   }
   {
     SELECT_LEX *upper= unit->outer_select();
     if (upper->parsing_place == IN_HAVING)
       upper->subquery_in_having= 1;
+    /* The subquery is an expression cache candidate */
+    upper->expr_cache_may_be_used[upper->parsing_place]= TRUE;
   }
+  DBUG_PRINT("info", ("engine: 0x%lx", (ulong)engine));
   DBUG_VOID_RETURN;
 }
 
@@ -128,6 +140,12 @@ void Item_subselect::cleanup()
     engine->cleanup();
   reset();
   value_assigned= 0;
+  expr_cache= 0;
+  forced_const= FALSE;
+  DBUG_PRINT("info", ("exec_counter: %d", exec_counter));
+#ifndef DBUG_OFF
+  exec_counter= 0;
+#endif
   DBUG_VOID_RETURN;
 }
 
@@ -140,16 +158,58 @@ void Item_singlerow_subselect::cleanup()
   DBUG_VOID_RETURN;
 }
 
+
+void Item_in_subselect::cleanup()
+{
+  DBUG_ENTER("Item_in_subselect::cleanup");
+  if (left_expr_cache)
+  {
+    left_expr_cache->delete_elements();
+    delete left_expr_cache;
+    left_expr_cache= NULL;
+  }
+  first_execution= TRUE;
+  if (in_strategy & SUBS_MATERIALIZATION)
+    in_strategy= 0;
+  pushed_cond_guards= NULL;
+  Item_subselect::cleanup();
+  DBUG_VOID_RETURN;
+}
+
+
+void Item_allany_subselect::cleanup()
+{
+  /*
+    The MAX/MIN transformation through injection is reverted through the
+    change_item_tree() mechanism. Revert the select_lex object of the
+    query to its initial state.
+  */
+  for (SELECT_LEX *sl= unit->first_select();
+       sl; sl= sl->next_select())
+    if (in_strategy & SUBS_MAXMIN_INJECTED)
+      sl->with_sum_func= false;
+  Item_in_subselect::cleanup();
+
+}
+
+
 Item_subselect::~Item_subselect()
 {
-  delete engine;
+  DBUG_ENTER("Item_subselect::~Item_subselect");
+  DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
+  if (own_engine)
+    delete engine;
+  else
+    engine->cleanup();
+  engine= NULL;
+  DBUG_VOID_RETURN;
 }
 
-Item_subselect::trans_res
+bool
 Item_subselect::select_transformer(JOIN *join)
 {
   DBUG_ENTER("Item_subselect::select_transformer");
-  DBUG_RETURN(RES_OK);
+  DBUG_RETURN(false);
 }
 
 
@@ -160,25 +220,48 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
   bool res;
 
   DBUG_ASSERT(fixed == 0);
-  engine->set_thd((thd= thd_param));
+  /* There is no reason to get a different THD. */
+  DBUG_ASSERT(thd == thd_param);
+  if (!done_first_fix_fields)
+  {
+    done_first_fix_fields= TRUE;
+    inside_first_fix_fields= TRUE;
+    upper_refs.empty();
+    /*
+      psergey-todo: remove _first_fix_fields calls, we need changes on every
+      execution
+    */
+  }
+
+  eliminated= FALSE;
+  parent_select= thd_param->lex->current_select;
 
   if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res))
     return TRUE;
+  
 
   if (!(res= engine->prepare()))
   {
     // all transformation is done (used by prepared statements)
     changed= 1;
+    inside_first_fix_fields= FALSE;
 
+    /*
+      Substitute the current item with an Item_in_optimizer that was
+      created by Item_in_subselect::select_in_like_transformer and
+      call fix_fields for the substituted item which in turn calls
+      engine->prepare for the subquery predicate.
+    */
     if (substitution)
     {
-      int ret= 0;
-
-      // did we changed top item of WHERE condition
+      /*
+        If the top item of the WHERE/HAVING condition changed,
+        set correct WHERE/HAVING for PS.
+      */
       if (unit->outer_select()->where == (*ref))
-	unit->outer_select()->where= substitution; // correct WHERE for PS
+        unit->outer_select()->where= substitution;
       else if (unit->outer_select()->having == (*ref))
-	unit->outer_select()->having= substitution; // correct HAVING for PS
+        unit->outer_select()->having= substitution;
 
       (*ref)= substitution;
       substitution->name= name;
@@ -187,20 +270,21 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
       substitution= 0;
       thd->where= "checking transformed subquery";
       if (!(*ref)->fixed)
-	ret= (*ref)->fix_fields(thd, ref);
-      thd->where= save_where;
-      return ret;
+	res= (*ref)->fix_fields(thd, ref);
+      goto end;
+
     }
     // Is it one field subselect?
     if (engine->cols() > max_columns)
     {
       my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
-      return TRUE;
+
+      goto end;
     }
     fix_length_and_dec();
   }
   else
-    goto err;
+    goto end;
   
   if ((uncacheable= engine->uncacheable()))
   {
@@ -210,12 +294,233 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
   }
   fixed= 1;
 
-err:
+end:
+  done_first_fix_fields= FALSE;
+  inside_first_fix_fields= FALSE;
   thd->where= save_where;
   return res;
 }
 
 
+bool Item_subselect::enumerate_field_refs_processor(uchar *arg)
+{
+  List_iterator<Ref_to_outside> it(upper_refs);
+  Ref_to_outside *upper;
+  
+  while ((upper= it++))
+  {
+    if (upper->item->walk(&Item::enumerate_field_refs_processor, FALSE, arg))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+bool Item_subselect::mark_as_eliminated_processor(uchar *arg)
+{
+  eliminated= TRUE;
+  return FALSE;
+}
+
+
+/**
+  Remove a subselect item from its unit so that the unit no longer
+  represents a subquery.
+
+  @param arg  unused parameter
+
+  @return
+    FALSE to force the evaluation of the processor for the subsequent items.
+*/
+
+bool Item_subselect::eliminate_subselect_processor(uchar *arg)
+{
+  unit->item= NULL;
+  unit->exclude_from_tree();
+  eliminated= TRUE;
+  return FALSE;
+}
+
+
+/**
+  Adjust the master select of the subquery to be the fake_select which
+  represents the whole UNION right above the subquery, instead of the
+  last query of the UNION.
+
+  @param arg  pointer to the fake select
+
+  @return
+    FALSE to force the evaluation of the processor for the subsequent items.
+*/
+
+bool Item_subselect::set_fake_select_as_master_processor(uchar *arg)
+{
+  SELECT_LEX *fake_select= (SELECT_LEX*) arg;
+  /*
+    Move the st_select_lex_unit of a subquery from a global ORDER BY clause to
+    become a direct child of the fake_select of a UNION. In this way the
+    ORDER BY that is applied to the temporary table that contains the result of
+    the whole UNION, and all columns in the subquery are resolved against this
+    table. The transformation is applied only for immediate child subqueries of
+    a UNION query.
+  */
+  if (unit->outer_select()->master_unit()->fake_select_lex == fake_select)
+  {
+    /*
+      Set the master of the subquery to be the fake select (i.e. the whole
+      UNION), instead of the last query in the UNION.
+    */
+    fake_select->add_slave(unit);
+    DBUG_ASSERT(unit->outer_select() == fake_select);
+    /* Adjust the name resolution context hierarchy accordingly. */
+    for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
+      sl->context.outer_context= &(fake_select->context);
+    /*
+      Undo Item_subselect::eliminate_subselect_processor because at that phase
+      we don't know yet that the ORDER clause will be moved to the fake select.
+    */
+    unit->item= this;
+    eliminated= FALSE;
+  }
+  return FALSE;
+}
+
+
+bool Item_subselect::mark_as_dependent(THD *thd, st_select_lex *select, 
+                                       Item *item)
+{
+  if (inside_first_fix_fields)
+  {
+    is_correlated= TRUE;
+    Ref_to_outside *upper;
+    if (!(upper= new (thd->stmt_arena->mem_root) Ref_to_outside()))
+      return TRUE;
+    upper->select= select;
+    upper->item= item;
+    if (upper_refs.push_back(upper, thd->stmt_arena->mem_root))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Adjust attributes after our parent select has been merged into grandparent
+
+  DESCRIPTION
+    Subquery is a composite object which may be correlated, that is, it may
+    have
+    1. references to tables of the parent select (i.e. one that has the clause
+      with the subquery predicate)
+    2. references to tables of the grandparent select
+    3. references to tables of further ancestors.
+    
+    Before the pullout, this item indicates:
+    - #1 with table bits in used_tables()
+    - #2 and #3 with OUTER_REF_TABLE_BIT.
+
+    After parent has been merged with grandparent:
+    - references to parent and grandparent tables should be indicated with 
+      table bits.
+    - references to greatgrandparent and further ancestors - with
+      OUTER_REF_TABLE_BIT.
+*/
+
+void Item_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  recalc_used_tables(new_parent, TRUE);
+  parent_select= new_parent;
+}
+
+
+class Field_fixer: public Field_enumerator
+{
+public:
+  table_map used_tables; /* Collect used_tables here */
+  st_select_lex *new_parent; /* Select we're in */
+  virtual void visit_field(Item_field *item)
+  {
+    //for (TABLE_LIST *tbl= new_parent->leaf_tables; tbl; tbl= tbl->next_local)
+    //{
+    //  if (tbl->table == field->table)
+    //  {
+        used_tables|= item->field->table->map;
+    //    return;
+    //  }
+    //}
+    //used_tables |= OUTER_REF_TABLE_BIT;
+  }
+};
+
+
+/*
+  Recalculate used_tables_cache 
+*/
+
+void Item_subselect::recalc_used_tables(st_select_lex *new_parent, 
+                                        bool after_pullout)
+{
+  List_iterator<Ref_to_outside> it(upper_refs);
+  Ref_to_outside *upper;
+  
+  used_tables_cache= 0;
+  while ((upper= it++))
+  {
+    bool found= FALSE;
+    /*
+      Check if
+        1. the upper reference refers to the new immediate parent select, or
+        2. one of the further ancestors.
+
+      We rely on the fact that the tree of selects is modified by some kind of
+      'flattening', i.e. a process where child selects are merged into their
+      parents.
+      The merged selects are removed from the select tree but keep pointers to
+      their parents.
+    */
+    for (st_select_lex *sel= upper->select; sel; sel= sel->outer_select())
+    {
+      /* 
+        If we've reached the new parent select by walking upwards from
+        reference's original select, this means that the reference is now 
+        referring to the direct parent:
+      */
+      if (sel == new_parent)
+      {
+        found= TRUE;
+        /* 
+          upper->item may be NULL when we've referred to a grouping function,
+          in which case we don't care about what it's table_map really is,
+          because item->with_sum_func==1 will ensure correct placement of the
+          item.
+        */
+        if (upper->item)
+        {
+          // Now, iterate over fields and collect used_tables() attribute:
+          Field_fixer fixer;
+          fixer.used_tables= 0;
+          fixer.new_parent= new_parent;
+          upper->item->walk(&Item::enumerate_field_refs_processor, FALSE,
+                            (uchar*)&fixer);
+          used_tables_cache |= fixer.used_tables;
+/*
+          if (after_pullout)
+            upper->item->fix_after_pullout(new_parent, &(upper->item));
+          upper->item->update_used_tables();
+*/          
+        }
+      }
+    }
+    if (!found)
+      used_tables_cache|= OUTER_REF_TABLE_BIT;
+  }
+  /* 
+    Don't update const_tables_cache yet as we don't yet know which of the
+    parent's tables are constant. Parent will call update_used_tables() after
+    he has done const table detection, and that will be our chance to update
+    const_tables_cache.
+  */
+}
+
 bool Item_subselect::walk(Item_processor processor, bool walk_subquery,
                           uchar *argument)
 {
@@ -233,6 +538,7 @@ bool Item_subselect::walk(Item_processor processor, bool walk_subquery,
       if (lex->having && (lex->having)->walk(processor, walk_subquery,
                                              argument))
         return 1;
+      /* TODO: why does this walk WHERE/HAVING but not ON expressions of outer joins? */
 
       while ((item=li++))
       {
@@ -275,6 +581,9 @@ bool Item_subselect::exec()
 
   bool res= engine->exec();
 
+#ifndef DBUG_OFF
+  ++exec_counter;
+#endif
   if (engine_changed)
   {
     engine_changed= 0;
@@ -284,6 +593,151 @@ bool Item_subselect::exec()
   DBUG_RETURN(res);
 }
 
+
+void Item_subselect::get_cache_parameters(List<Item> &parameters)
+{
+  Collect_deps_prm prm= { unit->first_select()->nest_level, &parameters };
+  walk(&Item::collect_outer_ref_processor, TRUE, (uchar*)&prm);
+}
+
+int Item_in_subselect::optimize(double *out_rows, double *cost)
+{
+  int res;
+  DBUG_ENTER("Item_in_subselect::optimize");
+  SELECT_LEX *save_select= thd->lex->current_select;
+  JOIN *join= unit->first_select()->join;
+
+  thd->lex->current_select= join->select_lex;
+  if ((res= join->optimize()))
+    DBUG_RETURN(res);
+
+  /* Calculate #rows and cost of join execution */
+  join->get_partial_cost_and_fanout(join->table_count - join->const_tables, 
+                                    table_map(-1),
+                                    cost, out_rows);
+
+  /*
+    Adjust join output cardinality. There can be these cases:
+    - Have no GROUP BY and no aggregate funcs: we won't get into this 
+      function because such join will be processed as a merged semi-join 
+      (TODO: does it really mean we don't need to handle such cases here at 
+       all? put ASSERT)
+    - Have no GROUP BY but have aggregate funcs: output is 1 record.
+    - Have GROUP BY and have (or not) aggregate funcs:  need to adjust output 
+      cardinality.
+  */
+  thd->lex->current_select= save_select;
+  if (!join->group_list && !join->group_optimized_away &&
+      join->tmp_table_param.sum_func_count)
+  {
+    DBUG_PRINT("info",("Materialized join will have only 1 row (it has "
+                       "aggregates but no GROUP BY"));
+    *out_rows= 1;
+  }
+  
+  /* Now with grouping */
+  if (join->group_list)
+  {
+    DBUG_PRINT("info",("Materialized join has grouping, trying to estimate it"));
+    double output_rows= get_post_group_estimate(join, *out_rows);
+    DBUG_PRINT("info",("Got value of %g", output_rows));
+    *out_rows= output_rows;
+  }
+
+  DBUG_RETURN(res);
+
+}
+
+
+/**
+  Check if an expression cache is needed for this subquery
+
+  @param thd             Thread handle
+
+  @details
+  The function checks whether a cache is needed for a subquery and whether
+  the result of the subquery can be put in cache.
+
+  @retval TRUE  cache is needed
+  @retval FALSE otherwise
+*/
+
+bool Item_subselect::expr_cache_is_needed(THD *thd)
+{
+  return ((engine->uncacheable() & UNCACHEABLE_DEPENDENT) &&
+          engine->cols() == 1 &&
+          optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) &&
+          !(engine->uncacheable() & (UNCACHEABLE_RAND |
+                                     UNCACHEABLE_SIDEEFFECT)));
+}
+
+
+/**
+  Check if an expression cache is needed for this subquery
+
+  @param thd             Thread handle
+
+  @details
+  The function checks whether a cache is needed for a subquery and whether
+  the result of the subquery can be put in cache.
+
+  @note
+  This method allows many columns in the subquery because it is supported by
+  Item_in optimizer and result of the IN subquery will be scalar in this
+  case.
+
+  @retval TRUE  cache is needed
+  @retval FALSE otherwise
+*/
+
+bool Item_in_subselect::expr_cache_is_needed(THD *thd)
+{
+  return (optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) &&
+          !(engine->uncacheable() & (UNCACHEABLE_RAND |
+                                     UNCACHEABLE_SIDEEFFECT)));
+}
+
+
+/*
+  Compute the IN predicate if the left operand's cache changed.
+*/
+
+bool Item_in_subselect::exec()
+{
+  DBUG_ENTER("Item_in_subselect::exec");
+  /*
+    Initialize the cache of the left predicate operand. This has to be done as
+    late as now, because Cached_item directly contains a resolved field (not
+    an item, and in some cases (when temp tables are created), these fields
+    end up pointing to the wrong field. One solution is to change Cached_item
+    to not resolve its field upon creation, but to resolve it dynamically
+    from a given Item_ref object.
+    TODO: the cache should be applied conditionally based on:
+    - rules - e.g. only if the left operand is known to be ordered, and/or
+    - on a cost-based basis, that takes into account the cost of a cache
+      lookup, the cache hit rate, and the savings per cache hit.
+  */
+  if (!left_expr_cache && (in_strategy & SUBS_MATERIALIZATION))
+    init_left_expr_cache();
+
+  /*
+    If the new left operand is already in the cache, reuse the old result.
+    Use the cached result only if this is not the first execution of IN
+    because the cache is not valid for the first execution.
+  */
+  if (!first_execution && left_expr_cache &&
+      test_if_item_cache_changed(*left_expr_cache) < 0)
+    DBUG_RETURN(FALSE);
+
+  /*
+    The exec() method below updates item::value, and item::null_value, thus if
+    we don't call it, the next call to item::val_int() will return whatever
+    result was computed by its previous call.
+  */
+  DBUG_RETURN(Item_subselect::exec());
+}
+
+
 Item::Type Item_subselect::type() const
 {
   return SUBSELECT_ITEM;
@@ -316,11 +770,15 @@ Item *Item_subselect::get_tmp_table_item(THD *thd_arg)
 
 void Item_subselect::update_used_tables()
 {
-  if (!engine->uncacheable())
+  if (!forced_const)
   {
-    // did all used tables become static?
-    if (!(used_tables_cache & ~engine->upper_select_const_tables()))
-      const_item_cache= 1;
+    recalc_used_tables(parent_select, FALSE);
+    if (!engine->uncacheable())
+    {
+      // did all used tables become static?
+      if (!(used_tables_cache & ~engine->upper_select_const_tables()))
+        const_item_cache= 1;
+    }
   }
 }
 
@@ -386,7 +844,7 @@ Item_maxmin_subselect::Item_maxmin_subselect(THD *thd_param,
     of Items belonged to subquery, which will be not repeated
   */
   used_tables_cache= parent->get_used_tables_cache();
-  const_item_cache= parent->get_const_item_cache();
+  const_item_cache= parent->const_item();
 
   /*
     this subquery always creates during preparation, so we can assign
@@ -424,7 +882,7 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type)
 
 void Item_singlerow_subselect::reset()
 {
-  null_value= TRUE;
+  Item_subselect::reset();
   if (value)
     value->null_value= TRUE;
 }
@@ -439,12 +897,17 @@ void Item_singlerow_subselect::reset()
   - switch off this optimization for prepare statement,
   because we do not rollback this changes.
   Make rollback for it, or special name resolving mode in 5.0.
+
+  @param join  Join object of the subquery (i.e. 'child' join).
+
+  @retval false  The subquery was transformed
 */
-Item_subselect::trans_res
+bool
 Item_singlerow_subselect::select_transformer(JOIN *join)
 {
+  DBUG_ENTER("Item_singlerow_subselect::select_transformer");
   if (changed)
-    return RES_OK;
+    DBUG_RETURN(false);
 
   SELECT_LEX *select_lex= join->select_lex;
   Query_arena *arena= thd->stmt_arena;
@@ -471,7 +934,6 @@ Item_singlerow_subselect::select_transformer(JOIN *join)
       !arena->is_stmt_prepare_or_first_sp_execute()
       )
   {
-
     have_to_be_excluded= 1;
     if (thd->lex->describe)
     {
@@ -487,9 +949,8 @@ Item_singlerow_subselect::select_transformer(JOIN *join)
     */
     substitution->walk(&Item::remove_dependence_processor, 0,
 		       (uchar *) select_lex->outer_select());
-    return RES_REDUCE;
   }
-  return RES_OK;
+  DBUG_RETURN(false);
 }
 
 
@@ -536,6 +997,40 @@ void Item_singlerow_subselect::fix_length_and_dec()
     maybe_null= engine->may_be_null();
 }
 
+
+/**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item* Item_singlerow_subselect::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_singlerow_subselect::expr_cache_insert_transformer");
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  if (expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd)))
+    DBUG_RETURN(expr_cache);
+  DBUG_RETURN(this);
+}
+
+
 uint Item_singlerow_subselect::cols()
 {
   return engine->cols();
@@ -682,11 +1177,15 @@ bool Item_in_subselect::test_limit(st_select_lex_unit *unit_arg)
 
 Item_in_subselect::Item_in_subselect(Item * left_exp,
 				     st_select_lex *select_lex):
-  Item_exists_subselect(), optimizer(0), transformed(0),
-  pushed_cond_guards(NULL), upper_item(0)
+  Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
+  optimizer(0), pushed_cond_guards(NULL), in_strategy(0),
+  is_jtbm_merged(FALSE), is_flattenable_semijoin(FALSE),
+  is_registered_semijoin(FALSE), 
+  upper_item(0)
 {
   DBUG_ENTER("Item_in_subselect::Item_in_subselect");
   left_expr= left_exp;
+  func= &eq_creator;
   init(select_lex, new select_exists_subselect(this));
   max_columns= UINT_MAX;
   maybe_null= 1;
@@ -697,13 +1196,18 @@ Item_in_subselect::Item_in_subselect(Item * left_exp,
   DBUG_VOID_RETURN;
 }
 
+int Item_in_subselect::get_identifier()
+{
+  return engine->get_identifier();
+}
+
 Item_allany_subselect::Item_allany_subselect(Item * left_exp,
                                              chooser_compare_func_creator fc,
 					     st_select_lex *select_lex,
 					     bool all_arg)
   :Item_in_subselect(), func_creator(fc), all(all_arg)
 {
-  DBUG_ENTER("Item_in_subselect::Item_in_subselect");
+  DBUG_ENTER("Item_allany_subselect::Item_allany_subselect");
   left_expr= left_exp;
   func= func_creator(all_arg);
   init(select_lex, new select_exists_subselect(this));
@@ -716,15 +1220,78 @@ Item_allany_subselect::Item_allany_subselect(Item * left_exp,
 }
 
 
+/**
+  Initialize length and decimals for EXISTS  and inherited (IN/ALL/ANY)
+  subqueries
+*/
+
+void Item_exists_subselect::init_length_and_dec()
+{
+  decimals= 0;
+  max_length= 1;
+  max_columns= engine->cols();
+}
+
+
 void Item_exists_subselect::fix_length_and_dec()
 {
-   decimals= 0;
-   max_length= 1;
-   max_columns= engine->cols();
-  /* We need only 1 row to determine existence */
+  DBUG_ENTER("Item_exists_subselect::fix_length_and_dec");
+  init_length_and_dec();
+  /*
+    We need only 1 row to determine existence (i.e. any EXISTS that is not
+    an IN always requires LIMIT 1)
+  */
   unit->global_parameters->select_limit= new Item_int((int32) 1);
+  DBUG_PRINT("info", ("Set limit to 1"));
+  DBUG_VOID_RETURN;
+}
+
+
+void Item_in_subselect::fix_length_and_dec()
+{
+  DBUG_ENTER("Item_in_subselect::fix_length_and_dec");
+  init_length_and_dec();
+  /*
+    Unlike Item_exists_subselect, LIMIT 1 is set later for
+    Item_in_subselect, depending on the chosen strategy.
+  */
+  DBUG_VOID_RETURN;
 }
 
+
+/**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item* Item_exists_subselect::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_exists_subselect::expr_cache_insert_transformer");
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  if (substype() == EXISTS_SUBS && expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd)))
+    DBUG_RETURN(expr_cache);
+  DBUG_RETURN(this);
+}
+
+
 double Item_exists_subselect::val_real()
 {
   DBUG_ASSERT(fixed == 1);
@@ -873,6 +1440,8 @@ String *Item_in_subselect::val_str(String *str)
 bool Item_in_subselect::val_bool()
 {
   DBUG_ASSERT(fixed == 1);
+  if (forced_const)
+    return value;
   null_value= was_null= FALSE;
   if (exec())
   {
@@ -905,59 +1474,27 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value)
 }
 
 
-/* 
-  Rewrite a single-column IN/ALL/ANY subselect
-
-  SYNOPSIS
-    Item_in_subselect::single_value_transformer()
-      join  Join object of the subquery (i.e. 'child' join).
-      func  Subquery comparison creator
-
-  DESCRIPTION
-    Rewrite a single-column subquery using rule-based approach. The subquery
-    
-       oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)
-    
-    First, try to convert the subquery to scalar-result subquery in one of
-    the forms:
-    
-       - oe $cmp$ (SELECT MAX(...) )  // handled by Item_singlerow_subselect
-       - oe $cmp$ <max>(SELECT ...)   // handled by Item_maxmin_subselect
-   
-    If that fails, the subquery will be handled with class Item_in_optimizer, 
-    Inject the predicates into subquery, i.e. convert it to:
-
-    - If the subquery has aggregates, GROUP BY, or HAVING, convert to
+/**
+  Prepare a single-column IN/ALL/ANY subselect for rewriting.
 
-       SELECT ie FROM ...  HAVING subq_having AND 
-                                   trigcond(oe $cmp$ ref_or_null_helper<ie>)
-                                   
-      the addition is wrapped into trigger only when we want to distinguish
-      between NULL and FALSE results.
+  @param join  Join object of the subquery (i.e. 'child' join).
 
-    - Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the
-      following:
+  @details
 
-      = If we don't need to distinguish between NULL and FALSE subquery:
-        
-        SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
+  Prepare a single-column subquery to be rewritten. Given the subquery.
 
-      = If we need to distinguish between those:
+  If the subquery has no tables it will be turned to an expression between
+  left part and SELECT list.
 
-        SELECT 1 FROM ...
-          WHERE  subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
-          HAVING trigcond(<is_not_null_test>(ie))
+  In other cases the subquery will be wrapped with  Item_in_optimizer which
+  allow later to turn it to EXISTS or MAX/MIN.
 
-  RETURN
-    RES_OK     - OK, either subquery was transformed, or appopriate
-                 predicates where injected into it.
-    RES_REDUCE - The subquery was reduced to non-subquery
-    RES_ERROR  - Error
+  @retval false  The subquery was transformed
+  @retval true   Error
 */
 
-Item_subselect::trans_res
-Item_in_subselect::single_value_transformer(JOIN *join,
-					    Comp_creator *func)
+bool
+Item_in_subselect::single_value_transformer(JOIN *join)
 {
   SELECT_LEX *select_lex= join->select_lex;
   DBUG_ENTER("Item_in_subselect::single_value_transformer");
@@ -966,119 +1503,62 @@ Item_in_subselect::single_value_transformer(JOIN *join,
     Check that the right part of the subselect contains no more than one
     column. E.g. in SELECT 1 IN (SELECT * ..) the right part is (SELECT * ...)
   */
+  // psergey: duplicated_subselect_card_check
   if (select_lex->item_list.elements > 1)
   {
     my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
-    DBUG_RETURN(RES_ERROR);
+    DBUG_RETURN(true);
   }
 
-  /*
-    If this is an ALL/ANY single-value subselect, try to rewrite it with
-    a MIN/MAX subselect. We can do that if a possible NULL result of the
-    subselect can be ignored.
-    E.g. SELECT * FROM t1 WHERE b > ANY (SELECT a FROM t2) can be rewritten
-    with SELECT * FROM t1 WHERE b > (SELECT MAX(a) FROM t2).
-    We can't check that this optimization is safe if it's not a top-level
-    item of the WHERE clause (e.g. because the WHERE clause can contain IS
-    NULL/IS NOT NULL functions). If so, we rewrite ALL/ANY with NOT EXISTS
-    later in this method.
-  */
-  if ((abort_on_null || (upper_item && upper_item->top_level())) &&
-      !select_lex->master_unit()->uncacheable && !func->eqne_op())
+  Item* join_having= join->having ? join->having : join->tmp_having;
+  if (!(join_having || select_lex->with_sum_func ||
+        select_lex->group_list.elements) &&
+      select_lex->table_list.elements == 0 &&
+      !select_lex->master_unit()->is_union())
   {
-    if (substitution)
-    {
-      // It is second (third, ...) SELECT of UNION => All is done
-      DBUG_RETURN(RES_OK);
-    }
-
-    Item *subs;
-    if (!select_lex->group_list.elements &&
-        !select_lex->having &&
-	!select_lex->with_sum_func &&
-	!(select_lex->next_select()) &&
-        select_lex->table_list.elements)
-    {
-      Item_sum_hybrid *item;
-      nesting_map save_allow_sum_func;
-      if (func->l_op())
-      {
-	/*
-	  (ALL && (> || =>)) || (ANY && (< || =<))
-	  for ALL condition is inverted
-	*/
-	item= new Item_sum_max(*select_lex->ref_pointer_array);
-      }
-      else
-      {
-	/*
-	  (ALL && (< || =<)) || (ANY && (> || =>))
-	  for ALL condition is inverted
-	*/
-	item= new Item_sum_min(*select_lex->ref_pointer_array);
-      }
-      if (upper_item)
-        upper_item->set_sum_test(item);
-      *select_lex->ref_pointer_array= item;
-      {
-	List_iterator<Item> it(select_lex->item_list);
-	it++;
-	it.replace(item);
-      }
-
-      DBUG_EXECUTE("where",
-                   print_where(item, "rewrite with MIN/MAX", QT_ORDINARY););
-      if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY)
-      {
-        DBUG_ASSERT(select_lex->non_agg_field_used());
-        select_lex->set_non_agg_field_used(false);
-      }
-
-      save_allow_sum_func= thd->lex->allow_sum_func;
-      thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level;
-      /*
-	Item_sum_(max|min) can't substitute other item => we can use 0 as
-        reference, also Item_sum_(max|min) can't be fixed after creation, so
-        we do not check item->fixed
-      */
-      if (item->fix_fields(thd, 0))
-	DBUG_RETURN(RES_ERROR);
-      thd->lex->allow_sum_func= save_allow_sum_func; 
-      /* we added aggregate function => we have to change statistic */
-      count_field_types(select_lex, &join->tmp_table_param, join->all_fields, 
-                        0);
-
-      subs= new Item_singlerow_subselect(select_lex);
-    }
-    else
+    Item *where_item= (Item*) select_lex->item_list.head();
+    /*
+      it is single select without tables => possible optimization
+      remove the dependence mark since the item is moved to upper
+      select and is not outer anymore.
+    */
+    where_item->walk(&Item::remove_dependence_processor, 0,
+                     (uchar *) select_lex->outer_select());
+    substitution= func->create(left_expr, where_item);
+    have_to_be_excluded= 1;
+    if (thd->lex->describe)
     {
-      Item_maxmin_subselect *item;
-      subs= item= new Item_maxmin_subselect(thd, this, select_lex, func->l_op());
-      if (upper_item)
-        upper_item->set_sub_test(item);
+      char warn_buff[MYSQL_ERRMSG_SIZE];
+      sprintf(warn_buff, ER(ER_SELECT_REDUCED), select_lex->select_number);
+      push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+                   ER_SELECT_REDUCED, warn_buff);
     }
-    /* fix fields is already called for  left expression */
-    substitution= func->create(left_expr, subs);
-    DBUG_RETURN(RES_OK);
+    DBUG_RETURN(false);
   }
 
+  /*
+    Wrap the current IN predicate in an Item_in_optimizer. The actual
+    substitution in the Item tree takes place in Item_subselect::fix_fields.
+  */
   if (!substitution)
   {
     /* We're invoked for the 1st (or the only) SELECT in the subquery UNION */
-    SELECT_LEX_UNIT *master_unit= select_lex->master_unit();
     substitution= optimizer;
 
-    SELECT_LEX *current= thd->lex->current_select, *up;
+    SELECT_LEX *current= thd->lex->current_select;
 
-    thd->lex->current_select= up= current->return_after_parsing();
+    thd->lex->current_select= current->return_after_parsing();
     //optimizer never use Item **ref => we can pass 0 as parameter
     if (!optimizer || optimizer->fix_left(thd, 0))
     {
       thd->lex->current_select= current;
-      DBUG_RETURN(RES_ERROR);
+      DBUG_RETURN(true);
     }
     thd->lex->current_select= current;
 
+    /* We will refer to upper level cache array => we have to save it for SP */
+    optimizer->keep_top_level_cache();
+
     /*
       As far as  Item_ref_in_optimizer do not substitute itself on fix_fields
       we can use same item for all selects.
@@ -1088,20 +1568,212 @@ Item_in_subselect::single_value_transformer(JOIN *join,
 			      (char *)"<no matter>",
 			      (char *)in_left_expr_name);
 
-    master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
   }
-  if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
+
+  DBUG_RETURN(false);
+}
+
+
+/**
+  Apply transformation max/min  transwormation to ALL/ANY subquery if it is
+  possible.
+
+  @param join  Join object of the subquery (i.e. 'child' join).
+
+  @details
+
+  If this is an ALL/ANY single-value subselect, try to rewrite it with
+  a MIN/MAX subselect. We can do that if a possible NULL result of the
+  subselect can be ignored.
+  E.g. SELECT * FROM t1 WHERE b > ANY (SELECT a FROM t2) can be rewritten
+  with SELECT * FROM t1 WHERE b > (SELECT MAX(a) FROM t2).
+  We can't check that this optimization is safe if it's not a top-level
+  item of the WHERE clause (e.g. because the WHERE clause can contain IS
+  NULL/IS NOT NULL functions). If so, we rewrite ALL/ANY with NOT EXISTS
+  later in this method.
+
+  @retval false  The subquery was transformed
+  @retval true   Error
+*/
+
+bool Item_allany_subselect::transform_into_max_min(JOIN *join)
+{
+  DBUG_ENTER("Item_allany_subselect::transform_into_max_min");
+  if (!(in_strategy & (SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE)))
+    DBUG_RETURN(false);
+  Item **place= optimizer->arguments() + 1;
+  THD *thd= join->thd;
+  SELECT_LEX *select_lex= join->select_lex;
+  Item *subs;
+
+  /*
+  */
+  DBUG_ASSERT(!substitution);
+
+  if (!select_lex->group_list.elements &&
+      !select_lex->having &&
+      !select_lex->with_sum_func &&
+      !(select_lex->next_select()) &&
+      select_lex->table_list.elements)
   {
-    if (!(pushed_cond_guards= (bool*)join->thd->alloc(sizeof(bool))))
-      DBUG_RETURN(RES_ERROR);
-    pushed_cond_guards[0]= TRUE;
+    Item_sum_hybrid *item;
+    nesting_map save_allow_sum_func;
+    if (func->l_op())
+    {
+      /*
+        (ALL && (> || =>)) || (ANY && (< || =<))
+        for ALL condition is inverted
+      */
+      item= new Item_sum_max(*select_lex->ref_pointer_array);
+    }
+    else
+    {
+      /*
+        (ALL && (< || =<)) || (ANY && (> || =>))
+        for ALL condition is inverted
+      */
+      item= new Item_sum_min(*select_lex->ref_pointer_array);
+    }
+    if (upper_item)
+      upper_item->set_sum_test(item);
+    thd->change_item_tree(select_lex->ref_pointer_array, item);
+    {
+      List_iterator<Item> it(select_lex->item_list);
+      it++;
+      thd->change_item_tree(it.ref(), item);
+    }
+
+    save_allow_sum_func= thd->lex->allow_sum_func;
+    thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level;
+    /*
+      Item_sum_(max|min) can't substitute other item => we can use 0 as
+      reference, also Item_sum_(max|min) can't be fixed after creation, so
+      we do not check item->fixed
+    */
+    if (item->fix_fields(thd, 0))
+      DBUG_RETURN(true);
+    thd->lex->allow_sum_func= save_allow_sum_func; 
+    /* we added aggregate function => we have to change statistic */
+    count_field_types(select_lex, &join->tmp_table_param, join->all_fields, 
+                      0);
+    if (join->prepare_stage2())
+      DBUG_RETURN(true);
+    subs= new Item_singlerow_subselect(select_lex);
+
+    /*
+      Remove other strategies if any (we already changed the query and
+      can't apply other strategy).
+    */
+    in_strategy= SUBS_MAXMIN_INJECTED;
   }
+  else
+  {
+    Item_maxmin_subselect *item;
+    subs= item= new Item_maxmin_subselect(thd, this, select_lex, func->l_op());
+    if (upper_item)
+      upper_item->set_sub_test(item);
+    /*
+      Remove other strategies if any (we already changed the query and
+      can't apply other strategy).
+    */
+    in_strategy= SUBS_MAXMIN_ENGINE;
+  }
+  /*
+    The swap is needed for expressions of type 'f1 < ALL ( SELECT ....)'
+    where we want to evaluate the sub query even if f1 would be null.
+  */
+  subs= func->create_swap(left_expr, subs);
+  thd->change_item_tree(place, subs);
+  if (subs->fix_fields(thd, &subs))
+    DBUG_RETURN(true);
+  DBUG_ASSERT(subs == (*place)); // There was no substitutions
+
+  select_lex->master_unit()->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+  select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+
+  DBUG_RETURN(false);
+}
+
+
+bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex)
+{
+  bool fix_res= 0;
+  if (!having->fixed)
+  {
+    select_lex->having_fix_field= 1;
+    fix_res= having->fix_fields(thd, 0);
+    select_lex->having_fix_field= 0;
+  }
+  return fix_res;
+}
+
+bool Item_allany_subselect::is_maxmin_applicable(JOIN *join)
+{
+  /*
+    Check if max/min optimization applicable: It is top item of
+    WHERE condition.
+  */
+  return (abort_on_null || (upper_item && upper_item->is_top_level_item())) &&
+      !join->select_lex->master_unit()->uncacheable && !func->eqne_op();
+}
+
+
+/**
+  Create the predicates needed to transform a single-column IN/ALL/ANY
+  subselect into a correlated EXISTS via predicate injection.
+
+  @param join[in]  Join object of the subquery (i.e. 'child' join).
+  @param where_item[out]   the in-to-exists addition to the where clause
+  @param having_item[out]  the in-to-exists addition to the having clause
+
+  @details
+  The correlated predicates are created as follows:
+
+  - If the subquery has aggregates, GROUP BY, or HAVING, convert to
+
+    SELECT ie FROM ...  HAVING subq_having AND 
+                               trigcond(oe $cmp$ ref_or_null_helper<ie>)
+                                   
+    the addition is wrapped into trigger only when we want to distinguish
+    between NULL and FALSE results.
+
+  - Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the
+    following:
+
+    = If we don't need to distinguish between NULL and FALSE subquery:
+        
+      SELECT ie FROM ... WHERE subq_where AND (oe $cmp$ ie)
+
+    = If we need to distinguish between those:
+
+      SELECT ie FROM ...
+        WHERE  subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
+        HAVING trigcond(<is_not_null_test>(ie))
+
+  @retval false If the new conditions were created successfully
+  @retval true  Error
+*/
 
-  select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
-  if (join->having || select_lex->with_sum_func ||
+bool
+Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
+                                                   Item **where_item,
+                                                   Item **having_item)
+{
+  SELECT_LEX *select_lex= join->select_lex;
+  /*
+    The non-transformed HAVING clause of 'join' may be stored in two ways
+    during JOIN::optimize: this->tmp_having= this->having; this->having= 0;
+  */
+  Item* join_having= join->having ? join->having : join->tmp_having;
+
+  DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond");
+
+  *where_item= NULL;
+  *having_item= NULL;
+
+  if (join_having || select_lex->with_sum_func ||
       select_lex->group_list.elements)
   {
-    bool tmp;
     Item *item= func->create(expr,
                              new Item_ref_null_helper(&select_lex->context,
                                                       this,
@@ -1117,25 +1789,12 @@ Item_in_subselect::single_value_transformer(JOIN *join,
       */
       item= new Item_func_trig_cond(item, get_cond_guard(0));
     }
-    
-    /*
-      AND and comparison functions can't be changed during fix_fields()
-      we can assign select_lex->having here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    select_lex->having= join->having= and_items(join->having, item);
-    if (join->having == item)
-      item->name= (char*)in_having_cond;
-    select_lex->having->top_level_item();
-    select_lex->having_fix_field= 1;
-    /*
-      we do not check join->having->fixed, because Item_and (from and_items)
-      or comparison function (from func->create) can't be fixed after creation
-    */
-    tmp= join->having->fix_fields(thd, 0);
-    select_lex->having_fix_field= 0;
-    if (tmp)
-      DBUG_RETURN(RES_ERROR);
+
+    if (!join_having)
+      item->name= (char*) in_having_cond;
+    if (fix_having(item, select_lex))
+      DBUG_RETURN(true);
+    *having_item= item;
   }
   else
   {
@@ -1143,13 +1802,8 @@ Item_in_subselect::single_value_transformer(JOIN *join,
 
     if (select_lex->table_list.elements)
     {
-      bool tmp;
-      Item *having= item, *orig_item= item;
-      select_lex->item_list.empty();
-      select_lex->item_list.push_back(new Item_int("Not_used",
-                                                   (longlong) 1,
-                                                   MY_INT64_NUM_DECIMAL_DIGITS));
-      select_lex->ref_pointer_array[0]= select_lex->item_list.head();
+      Item *having= item;
+      Item *orig_item= item;
        
       item= func->create(expr, item);
       if (!abort_on_null && orig_item->maybe_null)
@@ -1159,25 +1813,13 @@ Item_in_subselect::single_value_transformer(JOIN *join,
         {
           if (!(having= new Item_func_trig_cond(having,
                                                 get_cond_guard(0))))
-            DBUG_RETURN(RES_ERROR);
+            DBUG_RETURN(true);
         }
-	/*
-	  Item_is_not_null_test can't be changed during fix_fields()
-	  we can assign select_lex->having here, and pass 0 as last
-	  argument (reference) to fix_fields()
-	*/
-        having->name= (char*)in_having_cond;
-	select_lex->having= join->having= having;
-	select_lex->having_fix_field= 1;
-        /*
-          we do not check join->having->fixed, because Item_and (from
-          and_items) or comparison function (from func->create) can't be
-          fixed after creation
-        */
-	tmp= join->having->fix_fields(thd, 0);
-        select_lex->having_fix_field= 0;
-        if (tmp)
-	  DBUG_RETURN(RES_ERROR);
+        having->name= (char*) in_having_cond;
+        if (fix_having(having, select_lex))
+          DBUG_RETURN(true);
+        *having_item= having;
+
 	item= new Item_cond_or(item,
 			       new Item_func_isnull(orig_item));
       }
@@ -1188,39 +1830,23 @@ Item_in_subselect::single_value_transformer(JOIN *join,
       if (!abort_on_null && left_expr->maybe_null)
       {
         if (!(item= new Item_func_trig_cond(item, get_cond_guard(0))))
-          DBUG_RETURN(RES_ERROR);
+          DBUG_RETURN(true);
       }
+
       /*
         TODO: figure out why the following is done here in 
         single_value_transformer but there is no corresponding action in
         row_value_transformer?
       */
-      item->name= (char *)in_additional_cond;
-
-      /*
-	AND can't be changed during fix_fields()
-	we can assign select_lex->having here, and pass 0 as last
-	argument (reference) to fix_fields()
-      */
-      select_lex->where= join->conds= and_items(join->conds, item);
-      select_lex->where->top_level_item();
-      /*
-        we do not check join->conds->fixed, because Item_and can't be fixed
-        after creation
-      */
-      if (join->conds->fix_fields(thd, 0))
-	DBUG_RETURN(RES_ERROR);
+      item->name= (char *) in_additional_cond;
+      if (!item->fixed && item->fix_fields(thd, 0))
+        DBUG_RETURN(true);
+      *where_item= item;
     }
     else
     {
-      bool tmp;
       if (select_lex->master_unit()->is_union())
       {
-	/*
-	  comparison functions can't be changed during fix_fields()
-	  we can assign select_lex->having here, and pass 0 as last
-	  argument (reference) to fix_fields()
-	*/
         Item *new_having=
           func->create(expr,
                        new Item_ref_null_helper(&select_lex->context, this,
@@ -1231,123 +1857,177 @@ Item_in_subselect::single_value_transformer(JOIN *join,
         {
           if (!(new_having= new Item_func_trig_cond(new_having,
                                                     get_cond_guard(0))))
-            DBUG_RETURN(RES_ERROR);
+            DBUG_RETURN(true);
         }
-        new_having->name= (char*)in_having_cond;
-	select_lex->having= join->having= new_having;
-	select_lex->having_fix_field= 1;
-        
-        /*
-          we do not check join->having->fixed, because comparison function
-          (from func->create) can't be fixed after creation
-        */
-	tmp= join->having->fix_fields(thd, 0);
-        select_lex->having_fix_field= 0;
-        if (tmp)
-	  DBUG_RETURN(RES_ERROR);
+
+        new_having->name= (char*) in_having_cond;
+        if (fix_having(new_having, select_lex))
+          DBUG_RETURN(true);
+        *having_item= new_having;
       }
       else
-      {
-	// it is single select without tables => possible optimization
-        // remove the dependence mark since the item is moved to upper
-        // select and is not outer anymore.
-        item->walk(&Item::remove_dependence_processor, 0,
-                           (uchar *) select_lex->outer_select());
-	item= func->create(left_expr, item);
-	// fix_field of item will be done in time of substituting
-	substitution= item;
-	have_to_be_excluded= 1;
-	if (thd->lex->describe)
-	{
-	  char warn_buff[MYSQL_ERRMSG_SIZE];
-	  sprintf(warn_buff, ER(ER_SELECT_REDUCED), select_lex->select_number);
-	  push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
-		       ER_SELECT_REDUCED, warn_buff);
-	}
-	DBUG_RETURN(RES_REDUCE);
-      }
+        DBUG_ASSERT(false);
     }
   }
 
-  DBUG_RETURN(RES_OK);
+  DBUG_RETURN(false);
 }
 
 
-Item_subselect::trans_res
+/**
+  Wrap a multi-column IN/ALL/ANY subselect into an Item_in_optimizer.
+
+  @param join  Join object of the subquery (i.e. 'child' join).
+
+  @details
+  The subquery predicate is wrapped into an Item_in_optimizer. Later the query
+  optimization phase chooses whether the subquery under the Item_in_optimizer
+  will be further transformed into an equivalent correlated EXISTS by injecting
+  additional predicates, or will be executed via subquery materialization in its
+  unmodified form.
+
+  @retval false  The subquery was transformed
+  @retval true   Error
+*/
+
+bool
 Item_in_subselect::row_value_transformer(JOIN *join)
 {
   SELECT_LEX *select_lex= join->select_lex;
-  Item *having_item= 0;
   uint cols_num= left_expr->cols();
-  bool is_having_used= (join->having || select_lex->with_sum_func ||
-                        select_lex->group_list.first ||
-                        !select_lex->table_list.elements);
+
   DBUG_ENTER("Item_in_subselect::row_value_transformer");
 
-  if (select_lex->item_list.elements != left_expr->cols())
+  // psergey: duplicated_subselect_card_check
+  if (select_lex->item_list.elements != cols_num)
   {
-    my_error(ER_OPERAND_COLUMNS, MYF(0), left_expr->cols());
-    DBUG_RETURN(RES_ERROR);
+    my_error(ER_OPERAND_COLUMNS, MYF(0), cols_num);
+    DBUG_RETURN(true);
   }
 
+  /*
+    Wrap the current IN predicate in an Item_in_optimizer. The actual
+    substitution in the Item tree takes place in Item_subselect::fix_fields.
+  */
   if (!substitution)
   {
     //first call for this unit
     SELECT_LEX_UNIT *master_unit= select_lex->master_unit();
     substitution= optimizer;
 
-    SELECT_LEX *current= thd->lex->current_select, *up;
-    thd->lex->current_select= up= current->return_after_parsing();
+    SELECT_LEX *current= thd->lex->current_select;
+    thd->lex->current_select= current->return_after_parsing();
     //optimizer never use Item **ref => we can pass 0 as parameter
     if (!optimizer || optimizer->fix_left(thd, 0))
     {
       thd->lex->current_select= current;
-      DBUG_RETURN(RES_ERROR);
+      DBUG_RETURN(true);
     }
 
     // we will refer to upper level cache array => we have to save it in PS
     optimizer->keep_top_level_cache();
 
     thd->lex->current_select= current;
-    master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
-
-    if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
-    {
-      if (!(pushed_cond_guards= (bool*)join->thd->alloc(sizeof(bool) *
-                                                        left_expr->cols())))
-        DBUG_RETURN(RES_ERROR);
-      for (uint i= 0; i < cols_num; i++)
-        pushed_cond_guards[i]= TRUE;
-    }
+    /*
+      The uncacheable property controls a number of actions, e.g. whether to
+      save/restore (via init_save_join_tab/restore_tmp) the original JOIN for
+      plans with a temp table where the original JOIN was overriden by
+      make_simple_join. The UNCACHEABLE_EXPLAIN is ignored by EXPLAIN, thus
+      non-correlated subqueries will not appear as such to EXPLAIN.
+    */
+    master_unit->uncacheable|= UNCACHEABLE_EXPLAIN;
+    select_lex->uncacheable|= UNCACHEABLE_EXPLAIN;
   }
 
-  select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
+  DBUG_RETURN(false);
+}
+
+
+/**
+  Create the predicates needed to transform a multi-column IN/ALL/ANY
+  subselect into a correlated EXISTS via predicate injection.
+
+  @details
+  The correlated predicates are created as follows:
+
+  - If the subquery has aggregates, GROUP BY, or HAVING, convert to
+
+    (l1, l2, l3) IN (SELECT v1, v2, v3 ... HAVING having)
+    =>
+    EXISTS (SELECT ... HAVING having and
+                              (l1 = v1 or is null v1) and
+                              (l2 = v2 or is null v2) and
+                              (l3 = v3 or is null v3) and
+                              is_not_null_test(v1) and
+                              is_not_null_test(v2) and
+                              is_not_null_test(v3))
+
+    where is_not_null_test used to register nulls in case if we have
+    not found matching to return correct NULL value.
+
+  - Otherwise (no aggregates/GROUP BY/HAVING) convert the subquery as follows:
+
+    (l1, l2, l3) IN (SELECT v1, v2, v3 ... WHERE where)
+    =>
+    EXISTS (SELECT ... WHERE where and
+                             (l1 = v1 or is null v1) and
+                             (l2 = v2 or is null v2) and
+                             (l3 = v3 or is null v3)
+                       HAVING is_not_null_test(v1) and
+                              is_not_null_test(v2) and
+                              is_not_null_test(v3))
+    where is_not_null_test registers NULLs values but reject rows.
+
+    in case when we do not need correct NULL, we have simplier construction:
+    EXISTS (SELECT ... WHERE where and
+                             (l1 = v1) and
+                             (l2 = v2) and
+                             (l3 = v3)
+
+  @param join[in]  Join object of the subquery (i.e. 'child' join).
+  @param where_item[out]   the in-to-exists addition to the where clause
+  @param having_item[out]  the in-to-exists addition to the having clause
+
+  @retval false  If the new conditions were created successfully
+  @retval true   Error
+*/
+
+bool
+Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
+                                                Item **where_item,
+                                                Item **having_item)
+{
+  SELECT_LEX *select_lex= join->select_lex;
+  uint cols_num= left_expr->cols();
+  /*
+    The non-transformed HAVING clause of 'join' may be stored in two ways
+    during JOIN::optimize: this->tmp_having= this->having; this->having= 0;
+  */
+  Item* join_having= join->having ? join->having : join->tmp_having;
+  bool is_having_used= (join_having || select_lex->with_sum_func ||
+                        select_lex->group_list.first ||
+                        !select_lex->table_list.elements);
+
+  DBUG_ENTER("Item_in_subselect::create_row_in_to_exists_cond");
+
+  *where_item= NULL;
+  *having_item= NULL;
+
   if (is_having_used)
   {
-    /*
-      (l1, l2, l3) IN (SELECT v1, v2, v3 ... HAVING having) =>
-      EXISTS (SELECT ... HAVING having and
-                                (l1 = v1 or is null v1) and
-                                (l2 = v2 or is null v2) and
-                                (l3 = v3 or is null v3) and
-                                is_not_null_test(v1) and
-                                is_not_null_test(v2) and
-                                is_not_null_test(v3))
-      where is_not_null_test used to register nulls in case if we have
-      not found matching to return correct NULL value
-      TODO: say here explicitly if the order of AND parts matters or not.
-    */
+    /* TODO: say here explicitly if the order of AND parts matters or not. */
     Item *item_having_part2= 0;
     for (uint i= 0; i < cols_num; i++)
     {
       DBUG_ASSERT((left_expr->fixed &&
+
                   select_lex->ref_pointer_array[i]->fixed) ||
                   (select_lex->ref_pointer_array[i]->type() == REF_ITEM &&
                    ((Item_ref*)(select_lex->ref_pointer_array[i]))->ref_type() ==
                     Item_ref::OUTER_REF));
       if (select_lex->ref_pointer_array[i]->
           check_cols(left_expr->element_index(i)->cols()))
-        DBUG_RETURN(RES_ERROR);
+        DBUG_RETURN(true);
       Item *item_eq=
         new Item_func_eq(new
                          Item_ref(&select_lex->context,
@@ -1359,23 +2039,21 @@ Item_in_subselect::row_value_transformer(JOIN *join)
                          Item_ref(&select_lex->context,
                                   select_lex->ref_pointer_array + i,
                                   (char *)"<no matter>",
-                                  (char *)"<list ref>")
-                        );
+                                  (char *)"<list ref>"));
       Item *item_isnull=
         new Item_func_isnull(new
                              Item_ref(&select_lex->context,
                                       select_lex->ref_pointer_array+i,
                                       (char *)"<no matter>",
-                                      (char *)"<list ref>")
-                            );
+                                      (char *)"<list ref>"));
       Item *col_item= new Item_cond_or(item_eq, item_isnull);
       if (!abort_on_null && left_expr->element_index(i)->maybe_null)
       {
         if (!(col_item= new Item_func_trig_cond(col_item, get_cond_guard(i))))
-          DBUG_RETURN(RES_ERROR);
+          DBUG_RETURN(true);
       }
-      having_item= and_items(having_item, col_item);
-      
+      *having_item= and_items(*having_item, col_item);
+
       Item *item_nnull_test= 
          new Item_is_not_null_test(this,
                                    new Item_ref(&select_lex->context,
@@ -1387,34 +2065,15 @@ Item_in_subselect::row_value_transformer(JOIN *join)
       {
         if (!(item_nnull_test= 
               new Item_func_trig_cond(item_nnull_test, get_cond_guard(i))))
-          DBUG_RETURN(RES_ERROR);
+          DBUG_RETURN(true);
       }
       item_having_part2= and_items(item_having_part2, item_nnull_test);
       item_having_part2->top_level_item();
     }
-    having_item= and_items(having_item, item_having_part2);
-    having_item->top_level_item();
+    *having_item= and_items(*having_item, item_having_part2);
   }
   else
   {
-    /*
-      (l1, l2, l3) IN (SELECT v1, v2, v3 ... WHERE where) =>
-      EXISTS (SELECT ... WHERE where and
-                               (l1 = v1 or is null v1) and
-                               (l2 = v2 or is null v2) and
-                               (l3 = v3 or is null v3)
-                         HAVING is_not_null_test(v1) and
-                                is_not_null_test(v2) and
-                                is_not_null_test(v3))
-      where is_not_null_test register NULLs values but reject rows
-
-      in case when we do not need correct NULL, we have simplier construction:
-      EXISTS (SELECT ... WHERE where and
-                               (l1 = v1) and
-                               (l2 = v2) and
-                               (l3 = v3)
-    */
-    Item *where_item= 0;
     for (uint i= 0; i < cols_num; i++)
     {
       Item *item, *item_isnull;
@@ -1425,7 +2084,7 @@ Item_in_subselect::row_value_transformer(JOIN *join)
                     Item_ref::OUTER_REF));
       if (select_lex->ref_pointer_array[i]->
           check_cols(left_expr->element_index(i)->cols()))
-        DBUG_RETURN(RES_ERROR);
+        DBUG_RETURN(true);
       item=
         new Item_func_eq(new
                          Item_direct_ref(&select_lex->context,
@@ -1438,8 +2097,7 @@ Item_in_subselect::row_value_transformer(JOIN *join)
                                          select_lex->
                                          ref_pointer_array+i,
                                          (char *)"<no matter>",
-                                         (char *)"<list ref>")
-                        );
+                                         (char *)"<list ref>"));
       if (!abort_on_null)
       {
         Item *having_col_item=
@@ -1457,8 +2115,7 @@ Item_in_subselect::row_value_transformer(JOIN *join)
                                            select_lex->
                                            ref_pointer_array+i,
                                            (char *)"<no matter>",
-                                           (char *)"<list ref>")
-                          );
+                                           (char *)"<list ref>"));
         item= new Item_cond_or(item, item_isnull);
         /* 
           TODO: why we create the above for cases where the right part
@@ -1467,106 +2124,211 @@ Item_in_subselect::row_value_transformer(JOIN *join)
         if (left_expr->element_index(i)->maybe_null)
         {
           if (!(item= new Item_func_trig_cond(item, get_cond_guard(i))))
-            DBUG_RETURN(RES_ERROR);
+            DBUG_RETURN(true);
           if (!(having_col_item= 
                   new Item_func_trig_cond(having_col_item, get_cond_guard(i))))
-            DBUG_RETURN(RES_ERROR);
+            DBUG_RETURN(true);
         }
-        having_item= and_items(having_item, having_col_item);
+        *having_item= and_items(*having_item, having_col_item);
       }
-      where_item= and_items(where_item, item);
+      *where_item= and_items(*where_item, item);
     }
-    /*
-      AND can't be changed during fix_fields()
-      we can assign select_lex->where here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    select_lex->where= join->conds= and_items(join->conds, where_item);
-    select_lex->where->top_level_item();
-    if (join->conds->fix_fields(thd, 0))
-      DBUG_RETURN(RES_ERROR);
   }
-  if (having_item)
+
+  if (*where_item)
   {
-    bool res;
-    select_lex->having= join->having= and_items(join->having, having_item);
-    if (having_item == select_lex->having)
-      having_item->name= (char*)in_having_cond;
-    select_lex->having->top_level_item();
-    /*
-      AND can't be changed during fix_fields()
-      we can assign select_lex->having here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    select_lex->having_fix_field= 1;
-    res= join->having->fix_fields(thd, 0);
-    select_lex->having_fix_field= 0;
-    if (res)
-    {
-      DBUG_RETURN(RES_ERROR);
-    }
+    if (!(*where_item)->fixed && (*where_item)->fix_fields(thd, 0))
+      DBUG_RETURN(true);
+    (*where_item)->top_level_item();
+  }
+
+  if (*having_item)
+  {
+    if (!join_having)
+      (*having_item)->name= (char*) in_having_cond;
+    if (fix_having(*having_item, select_lex))
+      DBUG_RETURN(true);
+    (*having_item)->top_level_item();
   }
 
-  DBUG_RETURN(RES_OK);
+  DBUG_RETURN(false);
 }
 
 
-Item_subselect::trans_res
+bool
 Item_in_subselect::select_transformer(JOIN *join)
 {
-  return select_in_like_transformer(join, &eq_creator);
+  return select_in_like_transformer(join);
 }
 
 
 /**
-  Prepare IN/ALL/ANY/SOME subquery transformation and call appropriate
-  transformation function.
+  Create the predicates needed to transform an IN/ALL/ANY subselect into a
+  correlated EXISTS via predicate injection.
 
-    To decide which transformation procedure (scalar or row) applicable here
-    we have to call fix_fields() for left expression to be able to call
-    cols() method on it. Also this method make arena management for
-    underlying transformation methods.
+  @param join_arg  Join object of the subquery.
+
+  @retval FALSE  ok
+  @retval TRUE   error
+*/
+
+bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg)
+{
+  bool res;
+
+  DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE ||
+              engine->engine_type() == subselect_engine::UNION_ENGINE);
+  /*
+    TODO: the call to init_cond_guards allocates and initializes an
+    array of booleans that may not be used later because we may choose
+    materialization.
+    The two calls below to create_XYZ_cond depend on this boolean array.
+    If the dependency is removed, the call can be moved to a later phase.
+  */
+  init_cond_guards();
+  if (left_expr->cols() == 1)
+    res= create_single_in_to_exists_cond(join_arg,
+                                         &(join_arg->in_to_exists_where),
+                                         &(join_arg->in_to_exists_having));
+  else
+    res= create_row_in_to_exists_cond(join_arg,
+                                      &(join_arg->in_to_exists_where),
+                                      &(join_arg->in_to_exists_having));
+
+  /*
+    The IN=>EXISTS transformation makes non-correlated subqueries correlated.
+  */
+  join_arg->select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
+  /*
+    The uncacheable property controls a number of actions, e.g. whether to
+    save/restore (via init_save_join_tab/restore_tmp) the original JOIN for
+    plans with a temp table where the original JOIN was overriden by
+    make_simple_join. The UNCACHEABLE_EXPLAIN is ignored by EXPLAIN, thus
+    non-correlated subqueries will not appear as such to EXPLAIN.
+  */
+  join_arg->select_lex->master_unit()->uncacheable|= UNCACHEABLE_EXPLAIN;
+  join_arg->select_lex->uncacheable|= UNCACHEABLE_EXPLAIN;
+  return (res);
+}
+
+
+/**
+  Transform an IN/ALL/ANY subselect into a correlated EXISTS via injecting
+  correlated in-to-exists predicates.
+
+  @param join_arg  Join object of the subquery.
+
+  @retval FALSE  ok
+  @retval TRUE   error
+*/
+
+bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
+{
+  SELECT_LEX *select_lex= join_arg->select_lex;
+  Item *where_item= join_arg->in_to_exists_where;
+  Item *having_item= join_arg->in_to_exists_having;
+
+  DBUG_ENTER("Item_in_subselect::inject_in_to_exists_cond");
+
+  if (where_item)
+  {
+    List<Item> *and_args= NULL;
+    /*
+      If the top-level Item of the WHERE clause is an AND, detach the multiple
+      equality list that was attached to the end of the AND argument list by
+      build_equal_items_for_cond(). The multiple equalities must be detached
+      because fix_fields merges lower level AND arguments into the upper AND.
+      As a result, the arguments from lower-level ANDs are concatenated after
+      the multiple equalities. When the multiple equality list is treated as
+      such, it turns out that it contains non-Item_equal object which is wrong.
+    */
+    if (join_arg->conds && join_arg->conds->type() == Item::COND_ITEM &&
+        ((Item_cond*) join_arg->conds)->functype() == Item_func::COND_AND_FUNC)
+    {
+      and_args= ((Item_cond*) join_arg->conds)->argument_list();
+      if (join_arg->cond_equal)
+        and_args->disjoin((List<Item> *) &join_arg->cond_equal->current_level);
+    }
+
+    where_item= and_items(join_arg->conds, where_item);
+    if (!where_item->fixed && where_item->fix_fields(thd, 0))
+      DBUG_RETURN(true);
+    // TIMOUR TODO: call optimize_cond() for the new where clause
+    thd->change_item_tree(&select_lex->where, where_item);
+    select_lex->where->top_level_item();
+    join_arg->conds= select_lex->where;
+
+    /* Attach back the list of multiple equalities to the new top-level AND. */
+    if (and_args && join_arg->cond_equal)
+    {
+      /* The argument list of the top-level AND may change after fix fields. */
+      and_args= ((Item_cond*) join_arg->conds)->argument_list();
+      and_args->concat((List<Item> *) &join_arg->cond_equal->current_level);
+    }
+  }
+
+  if (having_item)
+  {
+    Item* join_having= join_arg->having ? join_arg->having:join_arg->tmp_having;
+    having_item= and_items(join_having, having_item);
+    if (fix_having(having_item, select_lex))
+      DBUG_RETURN(true);
+    // TIMOUR TODO: call optimize_cond() for the new having clause
+    thd->change_item_tree(&select_lex->having, having_item);
+    select_lex->having->top_level_item();
+    join_arg->having= select_lex->having;
+  }
+  join_arg->thd->change_item_tree(&unit->global_parameters->select_limit,
+                                  new Item_int((int32) 1));
+  unit->select_limit_cnt= 1;
+
+  DBUG_RETURN(false);
+}
+
+
+/**
+  Prepare IN/ALL/ANY/SOME subquery transformation and call the appropriate
+  transformation function.
 
   @param join    JOIN object of transforming subquery
-  @param func    creator of condition function of subquery
 
-  @retval
-    RES_OK      OK
-  @retval
-    RES_REDUCE  OK, and current subquery was reduced during
-    transformation
-  @retval
-    RES_ERROR   Error
+  @notes
+  To decide which transformation procedure (scalar or row) applicable here
+  we have to call fix_fields() for the left expression to be able to call
+  cols() method on it. Also this method makes arena management for
+  underlying transformation methods.
+
+  @retval  false  OK
+  @retval  true   Error
 */
 
-Item_subselect::trans_res
-Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
+bool
+Item_in_subselect::select_in_like_transformer(JOIN *join)
 {
   Query_arena *arena, backup;
-  SELECT_LEX *current= thd->lex->current_select, *up;
+  SELECT_LEX *current= thd->lex->current_select;
   const char *save_where= thd->where;
-  Item_subselect::trans_res res= RES_ERROR;
+  bool trans_res= true;
   bool result;
 
   DBUG_ENTER("Item_in_subselect::select_in_like_transformer");
 
+  /*
+    IN/SOME/ALL/ANY subqueries aren't support LIMIT clause. Without it
+    ORDER BY clause becomes meaningless thus we drop it here.
+  */
+  for (SELECT_LEX *sl= current->master_unit()->first_select();
+       sl; sl= sl->next_select())
   {
-    /*
-      IN/SOME/ALL/ANY subqueries aren't support LIMIT clause. Without it
-      ORDER BY clause becomes meaningless thus we drop it here.
-    */
-    SELECT_LEX *sl= current->master_unit()->first_select();
-    for (; sl; sl= sl->next_select())
+    if (sl->join)
     {
-      if (sl->join)
-        sl->join->order= 0;
+      sl->join->order= 0;
+      sl->join->skip_sort_order= 1;
     }
   }
 
   if (changed)
-  {
-    DBUG_RETURN(RES_OK);
-  }
+    DBUG_RETURN(false);
 
   thd->where= "IN/ALL/ANY subquery";
 
@@ -1574,6 +2336,8 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
     In some optimisation cases we will not need this Item_in_optimizer
     object, but we can't know it here, but here we need address correct
     reference on left expresion.
+
+    //psergey: he means degenerate cases like "... IN (SELECT 1)"
   */
   if (!optimizer)
   {
@@ -1585,7 +2349,7 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
       goto err;
   }
 
-  thd->lex->current_select= up= current->return_after_parsing();
+  thd->lex->current_select= current->return_after_parsing();
   result= (!left_expr->fixed &&
            left_expr->fix_fields(thd, optimizer->arguments()));
   /* fix_fields can change reference to left_expr, we need reassign it */
@@ -1595,9 +2359,6 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
   if (result)
     goto err;
 
-  transformed= 1;
-  arena= thd->activate_stmt_arena_if_needed(&backup);
-
   /*
     Both transformers call fix_fields() only for Items created inside them,
     and all that items do not make permanent changes in current item arena
@@ -1605,8 +2366,9 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
     of Item, we have to call fix_fields() for it only with original arena to
     avoid memory leack)
   */
+  arena= thd->activate_stmt_arena_if_needed(&backup);
   if (left_expr->cols() == 1)
-    res= single_value_transformer(join, func);
+    trans_res= single_value_transformer(join);
   else
   {
     /* we do not support row operation for ALL/ANY/SOME */
@@ -1615,21 +2377,21 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
       if (arena)
         thd->restore_active_arena(arena, &backup);
       my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
-      DBUG_RETURN(RES_ERROR);
+      DBUG_RETURN(true);
     }
-    res= row_value_transformer(join);
+    trans_res= row_value_transformer(join);
   }
   if (arena)
     thd->restore_active_arena(arena, &backup);
 err:
   thd->where= save_where;
-  DBUG_RETURN(res);
+  DBUG_RETURN(trans_res);
 }
 
 
 void Item_in_subselect::print(String *str, enum_query_type query_type)
 {
-  if (transformed)
+  if (in_strategy & SUBS_IN_TO_EXISTS)
     str->append(STRING_WITH_LEN("<exists>"));
   else
   {
@@ -1642,29 +2404,196 @@ void Item_in_subselect::print(String *str, enum_query_type query_type)
 
 bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref)
 {
-  bool result = 0;
-  
+  uint outer_cols_num;
+  List<Item> *inner_cols;
+
+  if (in_strategy & SUBS_SEMI_JOIN)
+    return !( (*ref)= new Item_int(1));
+
+  /*
+    Check if the outer and inner IN operands match in those cases when we
+    will not perform IN=>EXISTS transformation. Currently this is when we
+    use subquery materialization.
+
+    The condition below is true when this method was called recursively from
+    inside JOIN::prepare for the JOIN object created by the call chain
+    Item_subselect::fix_fields -> subselect_single_select_engine::prepare,
+    which creates a JOIN object for the subquery and calls JOIN::prepare for
+    the JOIN of the subquery.
+    Notice that in some cases, this doesn't happen, and the check_cols()
+    test for each Item happens later in
+    Item_in_subselect::row_value_in_to_exists_transformer.
+    The reason for this mess is that our JOIN::prepare phase works top-down
+    instead of bottom-up, so we first do name resoluton and semantic checks
+    for the outer selects, then for the inner.
+  */
+  if (engine &&
+      engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE &&
+      ((subselect_single_select_engine*)engine)->join)
+  {
+    outer_cols_num= left_expr->cols();
+
+    if (unit->is_union())
+      inner_cols= &(unit->types);
+    else
+      inner_cols= &(unit->first_select()->item_list);
+    if (outer_cols_num != inner_cols->elements)
+    {
+      my_error(ER_OPERAND_COLUMNS, MYF(0), outer_cols_num);
+      return TRUE;
+    }
+    if (outer_cols_num > 1)
+    {
+      List_iterator<Item> inner_col_it(*inner_cols);
+      Item *inner_col;
+      for (uint i= 0; i < outer_cols_num; i++)
+      {
+        inner_col= inner_col_it++;
+        if (inner_col->check_cols(left_expr->element_index(i)->cols()))
+          return TRUE;
+      }
+    }
+  }
+
   if ((thd_arg->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
-      left_expr && !left_expr->fixed)
-    result = left_expr->fix_fields(thd_arg, &left_expr);
+      left_expr && !left_expr->fixed &&
+      left_expr->fix_fields(thd_arg, &left_expr))
+    return TRUE;
+  if (Item_subselect::fix_fields(thd_arg, ref))
+    return TRUE;
+
+  fixed= TRUE;
+  return FALSE;
+}
+
+
+void Item_in_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  left_expr->fix_after_pullout(new_parent, &left_expr);
+  Item_subselect::fix_after_pullout(new_parent, ref);
+}
+
+void Item_in_subselect::update_used_tables()
+{
+  Item_subselect::update_used_tables();
+  left_expr->update_used_tables();
+  used_tables_cache |= left_expr->used_tables();
+}
+
+
+/**
+  Try to create and initialize an engine to compute a subselect via
+  materialization.
+
+  @details
+  The method creates a new engine for materialized execution, and initializes
+  the engine. The initialization may fail
+  - either because it wasn't possible to create the needed temporary table
+    and its index,
+  - or because of a memory allocation error,
+
+  @returns
+    @retval TRUE  memory allocation error occurred
+    @retval FALSE an execution method was chosen successfully
+*/
 
-  return result || Item_subselect::fix_fields(thd_arg, ref);
+bool Item_in_subselect::setup_mat_engine()
+{
+  subselect_hash_sj_engine       *mat_engine= NULL;
+  subselect_single_select_engine *select_engine;
+
+  DBUG_ENTER("Item_in_subselect::setup_mat_engine");
+
+  /*
+    The select_engine (that executes transformed IN=>EXISTS subselects) is
+    pre-created at parse time, and is stored in statment memory (preserved
+    across PS executions).
+  */
+  DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE);
+  select_engine= (subselect_single_select_engine*) engine;
+
+  /* Create/initialize execution objects. */
+  if (!(mat_engine= new subselect_hash_sj_engine(thd, this, select_engine)))
+    DBUG_RETURN(TRUE);
+
+  if (mat_engine->init(&select_engine->join->fields_list,
+                       engine->get_identifier()))
+    DBUG_RETURN(TRUE);
+
+  engine= mat_engine;
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Initialize the cache of the left operand of the IN predicate.
+
+  @note This method has the same purpose as alloc_group_fields(),
+  but it takes a different kind of collection of items, and the
+  list we push to is dynamically allocated.
+
+  @retval TRUE  if a memory allocation error occurred or the cache is
+                not applicable to the current query
+  @retval FALSE if success
+*/
+
+bool Item_in_subselect::init_left_expr_cache()
+{
+  JOIN *outer_join;
+
+  outer_join= unit->outer_select()->join;
+  /*
+    An IN predicate might be evaluated in a query for which all tables have
+    been optimzied away.
+  */ 
+  if (!outer_join || !outer_join->table_count || !outer_join->tables_list)
+    return TRUE;
+
+  if (!(left_expr_cache= new List<Cached_item>))
+    return TRUE;
+
+  for (uint i= 0; i < left_expr->cols(); i++)
+  {
+    Cached_item *cur_item_cache= new_Cached_item(thd,
+                                                 left_expr->element_index(i),
+                                                 FALSE);
+    if (!cur_item_cache || left_expr_cache->push_front(cur_item_cache))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+bool Item_in_subselect::init_cond_guards()
+{
+  uint cols_num= left_expr->cols();
+  if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
+  {
+    if (!(pushed_cond_guards= (bool*)thd->alloc(sizeof(bool) * cols_num)))
+        return TRUE;
+    for (uint i= 0; i < cols_num; i++)
+      pushed_cond_guards[i]= TRUE;
+  }
+  return FALSE;
 }
 
 
-Item_subselect::trans_res
+bool
 Item_allany_subselect::select_transformer(JOIN *join)
 {
-  transformed= 1;
+  DBUG_ENTER("Item_allany_subselect::select_transformer");
+  DBUG_ASSERT((in_strategy & ~(SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE |
+                               SUBS_IN_TO_EXISTS)) == 0);
+  in_strategy|= SUBS_IN_TO_EXISTS;
   if (upper_item)
     upper_item->show= 1;
-  return select_in_like_transformer(join, func);
+  DBUG_RETURN(select_in_like_transformer(join));
 }
 
 
 void Item_allany_subselect::print(String *str, enum_query_type query_type)
 {
-  if (transformed)
+  if (in_strategy & SUBS_IN_TO_EXISTS)
     str->append(STRING_WITH_LEN("<exists>"));
   else
   {
@@ -1686,23 +2615,28 @@ void subselect_engine::set_thd(THD *thd_arg)
 
 
 subselect_single_select_engine::
-subselect_single_select_engine(st_select_lex *select,
-			       select_subselect *result_arg,
+subselect_single_select_engine(THD *thd_arg, st_select_lex *select,
+			       select_result_interceptor *result_arg,
 			       Item_subselect *item_arg)
-  :subselect_engine(item_arg, result_arg),
-   prepared(0), optimized(0), executed(0), optimize_error(0),
+  :subselect_engine(thd_arg, item_arg, result_arg),
+   prepared(0), executed(0), optimize_error(0),
    select_lex(select), join(0)
 {
   select_lex->master_unit()->item= item_arg;
 }
 
+int subselect_single_select_engine::get_identifier()
+{
+  return select_lex->select_number; 
+}
 
 void subselect_single_select_engine::cleanup()
 {
   DBUG_ENTER("subselect_single_select_engine::cleanup");
-  prepared= optimized= executed= optimize_error= 0;
+  prepared= executed= optimize_error= 0;
   join= 0;
   result->cleanup();
+  select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
   DBUG_VOID_RETURN;
 }
 
@@ -1712,6 +2646,9 @@ void subselect_union_engine::cleanup()
   DBUG_ENTER("subselect_union_engine::cleanup");
   unit->reinit_exec_mechanism();
   result->cleanup();
+  unit->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+  for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
+    sl->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
   DBUG_VOID_RETURN;
 }
 
@@ -1743,31 +2680,66 @@ bool subselect_union_engine::no_rows()
   return test(!unit->fake_select_lex->join->send_records);
 }
 
+
 void subselect_uniquesubquery_engine::cleanup()
 {
   DBUG_ENTER("subselect_uniquesubquery_engine::cleanup");
-  /*
-    subselect_uniquesubquery_engine have not 'result' assigbed, so we do not
-    cleanup() it
+  /* 
+    Note for mergers: we don't have to, and actually must not de-initialize
+    tab->table->file here.
+    - We don't have to, because free_tmp_table() will call ha_index_or_rnd_end
+    - We must not do it, because tab->table may be a derived table which 
+      has been already dropped by close_thread_tables(), while we here are
+      called from cleanup_items()
   */
   DBUG_VOID_RETURN;
 }
 
 
-subselect_union_engine::subselect_union_engine(st_select_lex_unit *u,
-					       select_subselect *result_arg,
+subselect_union_engine::subselect_union_engine(THD *thd_arg, st_select_lex_unit *u,
+					       select_result_interceptor *result_arg,
 					       Item_subselect *item_arg)
-  :subselect_engine(item_arg, result_arg)
+  :subselect_engine(thd_arg, item_arg, result_arg)
 {
   unit= u;
   unit->item= item_arg;
 }
 
 
+/**
+  Create and prepare the JOIN object that represents the query execution
+  plan for the subquery.
+
+  @details
+  This method is called from Item_subselect::fix_fields. For prepared
+  statements it is called both during the PREPARE and EXECUTE phases in the
+  following ways:
+  - During PREPARE the optimizer needs some properties
+    (join->fields_list.elements) of the JOIN to proceed with preparation of
+    the remaining query (namely to complete ::fix_fields for the subselect
+    related classes. In the end of PREPARE the JOIN is deleted.
+  - When we EXECUTE the query, Item_subselect::fix_fields is called again, and
+    the JOIN object is re-created again, prepared and executed. In the end of
+    execution it is deleted.
+  In all cases the JOIN is created in runtime memory (not in the permanent
+  memory root).
+
+  @todo
+  Re-check what properties of 'join' are needed during prepare, and see if
+  we can avoid creating a JOIN during JOIN::prepare of the outer join.
+
+  @retval 0  if success
+  @retval 1  if error
+*/
+
 int subselect_single_select_engine::prepare()
 {
   if (prepared)
     return 0;
+  if (select_lex->join)
+  {
+    select_lex->cleanup();
+  }
   join= new JOIN(thd, select_lex->item_list,
 		 select_lex->options | SELECT_NO_UNLOCK, result);
   if (!join || !result)
@@ -1798,8 +2770,8 @@ int subselect_union_engine::prepare()
 
 int subselect_uniquesubquery_engine::prepare()
 {
-  //this never should be called
-  DBUG_ASSERT(0);
+  /* Should never be called. */
+  DBUG_ASSERT(FALSE);
   return 1;
 }
 
@@ -1846,7 +2818,7 @@ void subselect_engine::set_row(List<Item> &item_list, Item_cache **row)
     if (!(row[i]= Item_cache::get_cache(sel_item)))
       return;
     row[i]->setup(sel_item);
-    row[i]->store(sel_item);
+ //psergey-backport-timours:   row[i]->store(sel_item);
   }
   if (item_list.elements > 1)
     res_type= ROW_RESULT;
@@ -1899,14 +2871,10 @@ int subselect_single_select_engine::exec()
   char const *save_where= thd->where;
   SELECT_LEX *save_select= thd->lex->current_select;
   thd->lex->current_select= select_lex;
-  if (!optimized)
+  if (!join->optimized)
   {
     SELECT_LEX_UNIT *unit= select_lex->master_unit();
 
-    DBUG_EXECUTE_IF("bug11747970_simulate_error",
-                    DBUG_SET("+d,bug11747970_raise_error"););
-
-    optimized= 1;
     unit->set_limit(unit->global_parameters);
     if (join->optimize())
     {
@@ -1970,9 +2938,9 @@ int subselect_single_select_engine::exec()
         pushed down into the subquery. Those optimizations are ref[_or_null]
         acceses. Change them to be full table scans.
       */
-      for (uint i=join->const_tables ; i < join->tables ; i++)
+      for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); tab;
+           tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
       {
-        JOIN_TAB *tab=join->join_tab+i;
         if (tab && tab->keyuse)
         {
           for (uint i= 0; i < tab->ref.key_parts; i++)
@@ -2054,21 +3022,31 @@ int subselect_uniquesubquery_engine::scan_table()
   if (table->file->inited)
     table->file->ha_index_end();
  
-  table->file->ha_rnd_init(1);
+  if (table->file->ha_rnd_init_with_error(1))
+    DBUG_RETURN(1);
   table->file->extra_opt(HA_EXTRA_CACHE,
                          current_thd->variables.read_buff_size);
   table->null_row= 0;
   for (;;)
   {
-    error=table->file->rnd_next(table->record[0]);
-    if (error && error != HA_ERR_END_OF_FILE)
-    {
-      error= report_error(table, error);
-      break;
+    error=table->file->ha_rnd_next(table->record[0]);
+    if (error) {
+      if (error == HA_ERR_RECORD_DELETED)
+      {
+        error= 0;
+        continue;
+      }
+      if (error == HA_ERR_END_OF_FILE)
+      {
+        error= 0;
+        break;
+      }
+      else
+      {
+        error= report_error(table, error);
+        break;
+      }
     }
-    /* No more rows */
-    if (table->status)
-      break;
 
     if (!cond || cond->val_int())
     {
@@ -2179,6 +3157,56 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
 
 
 /*
+  @retval  1  A NULL was found in the outer reference, index lookup is
+              not applicable, the outer ref is unsusable as a lookup key,
+              use some other method to find a match.
+  @retval  0  The outer ref was copied into an index lookup key.
+  @retval -1  The outer ref cannot possibly match any row, IN is FALSE.
+*/
+/* TIMOUR: this method is a variant of copy_ref_key(), needs refactoring. */
+
+int subselect_uniquesubquery_engine::copy_ref_key_simple()
+{
+  for (store_key **copy= tab->ref.key_copy ; *copy ; copy++)
+  {
+    enum store_key::store_key_result store_res;
+    store_res= (*copy)->copy();
+    tab->ref.key_err= store_res;
+
+    /*
+      When there is a NULL part in the key we don't need to make index
+      lookup for such key thus we don't need to copy whole key.
+      If we later should do a sequential scan return OK. Fail otherwise.
+
+      See also the comment for the subselect_uniquesubquery_engine::exec()
+      function.
+    */
+    null_keypart= (*copy)->null_key;
+    if (null_keypart)
+      return 1;
+
+    /*
+      Check if the error is equal to STORE_KEY_FATAL. This is not expressed 
+      using the store_key::store_key_result enum because ref.key_err is a 
+      boolean and we want to detect both TRUE and STORE_KEY_FATAL from the 
+      space of the union of the values of [TRUE, FALSE] and 
+      store_key::store_key_result.  
+      TODO: fix the variable an return types.
+    */
+    if (store_res == store_key::STORE_KEY_FATAL)
+    {
+      /*
+       Error converting the left IN operand to the column type of the right
+       IN operand. 
+      */
+      return -1;
+    }
+  }
+  return 0;
+}
+
+
+/*
   Execute subselect
 
   SYNOPSIS
@@ -2218,7 +3246,13 @@ int subselect_uniquesubquery_engine::exec()
  
   /* TODO: change to use of 'full_scan' here? */
   if (copy_ref_key())
+  {
+    /*
+      TIMOUR: copy_ref_key() == 1 means NULL result, not error, why return 1?
+      Check who reiles on this result.
+    */
     DBUG_RETURN(1);
+  }
   if (table->status)
   {
     /* 
@@ -2229,15 +3263,19 @@ int subselect_uniquesubquery_engine::exec()
     DBUG_RETURN(0);
   }
 
+  if (!tab->preread_init_done && tab->preread_init())
+    DBUG_RETURN(1);
+
   if (null_keypart)
     DBUG_RETURN(scan_table());
  
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key, 0);
-  error= table->file->index_read_map(table->record[0],
-                                     tab->ref.key_buff,
-                                     make_prev_keypart_map(tab->ref.key_parts),
-                                     HA_READ_KEY_EXACT);
+  error= table->file->ha_index_read_map(table->record[0],
+                                        tab->ref.key_buff,
+                                        make_prev_keypart_map(tab->
+                                                              ref.key_parts),
+                                        HA_READ_KEY_EXACT);
   if (error &&
       error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
     error= report_error(table, error);
@@ -2258,10 +3296,51 @@ int subselect_uniquesubquery_engine::exec()
 }
 
 
+/*
+  TIMOUR: write comment
+*/
+
+int subselect_uniquesubquery_engine::index_lookup()
+{
+  DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup");
+  int error;
+  TABLE *table= tab->table;
+ 
+  if (!table->file->inited)
+    table->file->ha_index_init(tab->ref.key, 0);
+  error= table->file->ha_index_read_map(table->record[0],
+                                        tab->ref.key_buff,
+                                        make_prev_keypart_map(tab->
+                                                              ref.key_parts),
+                                        HA_READ_KEY_EXACT);
+  DBUG_PRINT("info", ("lookup result: %i", error));
+
+  if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+  {
+    /*
+      TIMOUR: I don't understand at all when do we need to call report_error.
+      In most places where we access an index, we don't do this. Why here?
+    */
+    error= report_error(table, error);
+    DBUG_RETURN(error);
+  }
+
+  table->null_row= 0;
+  if (!error && (!cond || cond->val_int()))
+    ((Item_in_subselect *) item)->value= 1;
+  else
+    ((Item_in_subselect *) item)->value= 0;
+
+  DBUG_RETURN(0);
+}
+
+
+
 subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
 {
   /* Tell handler we don't need the index anymore */
-  tab->table->file->ha_index_end();
+  //psergey-merge-todo: the following was gone in 6.0:
+ //psergey-merge: don't need this after all: tab->table->file->ha_index_end();
 }
 
 
@@ -2269,7 +3348,7 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
   Index-lookup subselect 'engine' - run the subquery
 
   SYNOPSIS
-    subselect_uniquesubquery_engine:exec()
+    subselect_indexsubquery_engine:exec()
       full_scan 
 
   DESCRIPTION
@@ -2319,7 +3398,7 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
 
 int subselect_indexsubquery_engine::exec()
 {
-  DBUG_ENTER("subselect_indexsubquery_engine::exec");
+  DBUG_ENTER("subselect_indexsubquery_engine");
   int error;
   bool null_finding= 0;
   TABLE *table= tab->table;
@@ -2350,15 +3429,19 @@ int subselect_indexsubquery_engine::exec()
     DBUG_RETURN(0);
   }
 
+  if (!tab->preread_init_done && tab->preread_init())
+    DBUG_RETURN(1);
+
   if (null_keypart)
     DBUG_RETURN(scan_table());
 
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key, 1);
-  error= table->file->index_read_map(table->record[0],
-                                     tab->ref.key_buff,
-                                     make_prev_keypart_map(tab->ref.key_parts),
-                                     HA_READ_KEY_EXACT);
+  error= table->file->ha_index_read_map(table->record[0],
+                                        tab->ref.key_buff,
+                                        make_prev_keypart_map(tab->
+                                                              ref.key_parts),
+                                        HA_READ_KEY_EXACT);
   if (error &&
       error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
     error= report_error(table, error);
@@ -2379,9 +3462,9 @@ int subselect_indexsubquery_engine::exec()
             ((Item_in_subselect *) item)->value= 1;
           break;
         }
-        error= table->file->index_next_same(table->record[0],
-                                            tab->ref.key_buff,
-                                            tab->ref.key_length);
+        error= table->file->ha_index_next_same(table->record[0],
+                                               tab->ref.key_buff,
+                                               tab->ref.key_length);
         if (error && error != HA_ERR_END_OF_FILE)
         {
           error= report_error(table, error);
@@ -2406,8 +3489,10 @@ int subselect_indexsubquery_engine::exec()
 
 uint subselect_single_select_engine::cols()
 {
-  DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields()
-  return select_lex->join->fields_list.elements;
+  //psergey-sj-backport: the following assert was gone in 6.0:
+  //DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields()
+  //return select_lex->join->fields_list.elements;
+  return select_lex->item_list.elements;
 }
 
 
@@ -2448,10 +3533,13 @@ void subselect_uniquesubquery_engine::exclude()
 }
 
 
-table_map subselect_engine::calc_const_tables(TABLE_LIST *table)
+table_map subselect_engine::calc_const_tables(List<TABLE_LIST> &list)
 {
   table_map map= 0;
-  for (; table; table= table->next_leaf)
+  List_iterator<TABLE_LIST> ti(list);
+  TABLE_LIST *table;
+  //for (; table; table= table->next_leaf)
+  while ((table= ti++))
   {
     TABLE *tbl= table->table;
     if (tbl && tbl->const_table)
@@ -2489,10 +3577,20 @@ void subselect_union_engine::print(String *str, enum_query_type query_type)
 void subselect_uniquesubquery_engine::print(String *str,
                                             enum_query_type query_type)
 {
+  char *table_name= tab->table->s->table_name.str;
   str->append(STRING_WITH_LEN("<primary_index_lookup>("));
   tab->ref.items[0]->print(str, query_type);
   str->append(STRING_WITH_LEN(" in "));
-  str->append(tab->table->s->table_name.str, tab->table->s->table_name.length);
+  if (tab->table->s->table_category == TABLE_CATEGORY_TEMPORARY)
+  {
+    /*
+      Temporary tables' names change across runs, so they can't be used for
+      EXPLAIN EXTENDED.
+    */
+    str->append(STRING_WITH_LEN("<temporary table>"));
+  }
+  else
+    str->append(table_name, tab->table->s->table_name.length);
   KEY *key_info= tab->table->key_info+ tab->ref.key;
   str->append(STRING_WITH_LEN(" on "));
   str->append(key_info->name);
@@ -2504,6 +3602,29 @@ void subselect_uniquesubquery_engine::print(String *str,
   str->append(')');
 }
 
+/*
+TODO:
+The above ::print method should be changed as below. Do it after
+all other tests pass.
+
+void subselect_uniquesubquery_engine::print(String *str)
+{
+  KEY *key_info= tab->table->key_info + tab->ref.key;
+  str->append(STRING_WITH_LEN("<primary_index_lookup>("));
+  for (uint i= 0; i < key_info->key_parts; i++)
+    tab->ref.items[i]->print(str);
+  str->append(STRING_WITH_LEN(" in "));
+  str->append(tab->table->s->table_name.str, tab->table->s->table_name.length);
+  str->append(STRING_WITH_LEN(" on "));
+  str->append(key_info->name);
+  if (cond)
+  {
+    str->append(STRING_WITH_LEN(" where "));
+    cond->print(str);
+  }
+  str->append(')');
+}
+*/
 
 void subselect_indexsubquery_engine::print(String *str,
                                            enum_query_type query_type)
@@ -2535,6 +3656,7 @@ void subselect_indexsubquery_engine::print(String *str,
 
   @param si		new subselect Item
   @param res		new select_result object
+  @param temp           temporary assignment
 
   @retval
     FALSE OK
@@ -2542,12 +3664,32 @@ void subselect_indexsubquery_engine::print(String *str,
     TRUE  error
 */
 
-bool subselect_single_select_engine::change_result(Item_subselect *si,
-                                                 select_subselect *res)
+bool
+subselect_single_select_engine::change_result(Item_subselect *si,
+                                              select_result_interceptor *res,
+                                              bool temp)
 {
   item= si;
-  result= res;
-  return select_lex->join->change_result(result);
+  if (temp)
+  {
+    /*
+      Here we reuse change_item_tree to roll back assignment.  It has
+      nothing special about Item* pointer so it is safe conversion. We do
+      not change the interface to be compatible with MySQL.
+    */
+    thd->change_item_tree((Item**) &result, (Item*)res);
+  }
+  else
+    result= res;
+
+  /*
+    We can't use 'result' below as gcc 4.2.4's alias optimization
+    assumes that result was not changed by thd->change_item_tree().
+    I tried to find a solution to make gcc happy, but could not find anything
+    that would not require a lot of extra code that would be harder to manage
+    than the current code.
+  */
+  return select_lex->join->change_result(res);
 }
 
 
@@ -2564,11 +3706,15 @@ bool subselect_single_select_engine::change_result(Item_subselect *si,
 */
 
 bool subselect_union_engine::change_result(Item_subselect *si,
-                                         select_subselect *res)
+                                           select_result_interceptor *res,
+                                           bool temp)
 {
   item= si;
   int rc= unit->change_result(res, result);
-  result= res;
+  if (temp)
+    thd->change_item_tree((Item**) &result, (Item*)res);
+  else
+    result= res;
   return rc;
 }
 
@@ -2585,8 +3731,11 @@ bool subselect_union_engine::change_result(Item_subselect *si,
     TRUE  error
 */
 
-bool subselect_uniquesubquery_engine::change_result(Item_subselect *si,
-                                                  select_subselect *res)
+bool
+subselect_uniquesubquery_engine::change_result(Item_subselect *si,
+                                               select_result_interceptor *res,
+                                               bool temp
+                                               __attribute__((unused)))
 {
   DBUG_ASSERT(0);
   return TRUE;
@@ -2654,5 +3803,1955 @@ bool subselect_union_engine::no_tables()
 bool subselect_uniquesubquery_engine::no_tables()
 {
   /* returning value is correct, but this method should never be called */
+  DBUG_ASSERT(FALSE);
   return 0;
 }
+
+
+/******************************************************************************
+  WL#1110 - Implementation of class subselect_hash_sj_engine
+******************************************************************************/
+
+
+/**
+  Check if an IN predicate should be executed via partial matching using
+  only schema information.
+
+  @details
+  This test essentially has three results:
+  - partial matching is applicable, but cannot be executed due to a
+    limitation in the total number of indexes, as a result we can't
+    use subquery materialization at all.
+  - partial matching is either applicable or not, and this can be
+    determined by looking at 'this->max_keys'.
+  If max_keys > 1, then we need partial matching because there are
+  more indexes than just the one we use during materialization to
+  remove duplicates.
+
+  @note
+  TIMOUR: The schema-based analysis for partial matching can be done once for
+  prepared statement and remembered. It is done here to remove the need to
+  save/restore all related variables between each re-execution, thus making
+  the code simpler.
+
+  @retval PARTIAL_MATCH  if a partial match should be used
+  @retval COMPLETE_MATCH if a complete match (index lookup) should be used
+*/
+
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_schema()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+
+  if (item_in->is_top_level_item())
+    return COMPLETE_MATCH;
+  else
+  {
+    List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
+    Item *outer_col, *inner_col;
+
+    for (uint i= 0; i < item_in->left_expr->cols(); i++)
+    {
+      outer_col= item_in->left_expr->element_index(i);
+      inner_col= inner_col_it++;
+
+      if (!inner_col->maybe_null && !outer_col->maybe_null)
+        bitmap_set_bit(&non_null_key_parts, i);
+      else
+      {
+        bitmap_set_bit(&partial_match_key_parts, i);
+        ++count_partial_match_columns;
+      }
+    }
+  }
+
+  /* If no column contains NULLs use regular hash index lookups. */
+  if (count_partial_match_columns)
+    return PARTIAL_MATCH;
+  return COMPLETE_MATCH;
+}
+
+
+/**
+  Test whether an IN predicate must be computed via partial matching
+  based on the NULL statistics for each column of a materialized subquery.
+
+  @details The procedure analyzes column NULL statistics, updates the
+  matching type of columns that cannot be NULL or that contain only NULLs.
+  Based on this, the procedure determines the final execution strategy for
+  the [NOT] IN predicate.
+
+  @retval PARTIAL_MATCH  if a partial match should be used
+  @retval COMPLETE_MATCH if a complete match (index lookup) should be used
+*/
+
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_data()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+  Item *outer_col;
+
+  /*
+    If we already determined that a complete match is enough based on schema
+    information, nothing can be better.
+  */
+  if (strategy == COMPLETE_MATCH)
+    return COMPLETE_MATCH;
+
+  for (uint i= 0; i < item_in->left_expr->cols(); i++)
+  {
+    if (!bitmap_is_set(&partial_match_key_parts, i))
+      continue;
+    outer_col= item_in->left_expr->element_index(i);
+    /*
+      If column 'i' doesn't contain NULLs, and the corresponding outer reference
+      cannot have a NULL value, then 'i' is a non-nullable column.
+    */
+    if (result_sink->get_null_count_of_col(i) == 0 && !outer_col->maybe_null)
+    {
+      bitmap_clear_bit(&partial_match_key_parts, i);
+      bitmap_set_bit(&non_null_key_parts, i);
+      --count_partial_match_columns;
+    }
+    if (result_sink->get_null_count_of_col(i) == tmp_table->file->stats.records)
+      ++count_null_only_columns;
+  }
+
+  /* If no column contains NULLs use regular hash index lookups. */
+  if (!count_partial_match_columns)
+    return COMPLETE_MATCH;
+  return PARTIAL_MATCH;
+}
+
+
+void
+subselect_hash_sj_engine::choose_partial_match_strategy(
+  bool has_non_null_key, bool has_covering_null_row,
+  MY_BITMAP *partial_match_key_parts)
+{
+  ulonglong pm_buff_size;
+
+  DBUG_ASSERT(strategy == PARTIAL_MATCH);
+  /*
+    Choose according to global optimizer switch. If only one of the switches is
+    'ON', then the remaining strategy is the only possible one. The only cases
+    when this will be overriden is when the total size of all buffers for the
+    merge strategy is bigger than the 'rowid_merge_buff_size' system variable,
+    or if there isn't enough physical memory to allocate the buffers.
+  */
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+       optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+    strategy= PARTIAL_MATCH_SCAN;
+  else if
+     ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+      !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+    strategy= PARTIAL_MATCH_MERGE;
+
+  /*
+    If both switches are ON, or both are OFF, we interpret that as "let the
+    optimizer decide". Perform a cost based choice between the two partial
+    matching strategies.
+  */
+  /*
+    TIMOUR: the above interpretation of the switch values could be changed to:
+    - if both are ON - let the optimizer decide,
+    - if both are OFF - do not use partial matching, therefore do not use
+      materialization in non-top-level predicates.
+    The problem with this is that we know for sure if we need partial matching
+    only after the subquery is materialized, and this is too late to revert to
+    the IN=>EXISTS strategy.
+  */
+  if (strategy == PARTIAL_MATCH)
+  {
+    /*
+      TIMOUR: Currently we use a super simplistic measure. This will be
+      addressed in a separate task.
+    */
+    if (tmp_table->file->stats.records < 100)
+      strategy= PARTIAL_MATCH_SCAN;
+    else
+      strategy= PARTIAL_MATCH_MERGE;
+  }
+
+  /* Check if there is enough memory for the rowid merge strategy. */
+  if (strategy == PARTIAL_MATCH_MERGE)
+  {
+    pm_buff_size= rowid_merge_buff_size(has_non_null_key,
+                                        has_covering_null_row,
+                                        partial_match_key_parts);
+    if (pm_buff_size > thd->variables.rowid_merge_buff_size)
+      strategy= PARTIAL_MATCH_SCAN;
+  }
+}
+
+
+/*
+  Compute the memory size of all buffers proportional to the number of rows
+  in tmp_table.
+
+  @details
+  If the result is bigger than thd->variables.rowid_merge_buff_size, partial
+  matching via merging is not applicable.
+*/
+
+ulonglong subselect_hash_sj_engine::rowid_merge_buff_size(
+  bool has_non_null_key, bool has_covering_null_row,
+  MY_BITMAP *partial_match_key_parts)
+{
+  /* Total size of all buffers used by partial matching. */
+  ulonglong buff_size;
+  ha_rows row_count= tmp_table->file->stats.records;
+  uint rowid_length= tmp_table->file->ref_length;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+
+  /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */
+  buff_size= row_count * rowid_length * sizeof(uchar);
+
+  if (has_non_null_key)
+  {
+    /* Add the size of Ordered_key::key_buff of the only non-NULL key. */
+    buff_size+= row_count * sizeof(rownum_t);
+  }
+
+  if (!has_covering_null_row)
+  {
+    for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+    {
+      if (!bitmap_is_set(partial_match_key_parts, i) ||
+          result_sink->get_null_count_of_col(i) == row_count)
+        continue; /* In these cases we wouldn't construct Ordered keys. */
+
+      /* Add the size of Ordered_key::key_buff */
+      buff_size+= (row_count - result_sink->get_null_count_of_col(i)) *
+                         sizeof(rownum_t);
+      /* Add the size of Ordered_key::null_key */
+      buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i));
+    }
+  }
+
+  return buff_size;
+}
+
+
+/*
+  Initialize a MY_BITMAP with a buffer allocated on the current
+  memory root.
+  TIMOUR: move to bitmap C file?
+*/
+
+static my_bool
+bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root)
+{
+  my_bitmap_map *bitmap_buf;
+
+  if (!(bitmap_buf= (my_bitmap_map*) alloc_root(mem_root,
+                                                bitmap_buffer_size(n_bits))) ||
+      bitmap_init(map, bitmap_buf, n_bits, FALSE))
+    return TRUE;
+  bitmap_clear_all(map);
+  return FALSE;
+}
+
+
+/**
+  Create all structures needed for IN execution that can live between PS
+  reexecution.
+
+  @param tmp_columns the items that produce the data for the temp table
+  @param subquery_id subquery's identifier (to make "<subquery%d>" name for
+                                            EXPLAIN)
+
+  @details
+  - Create a temporary table to store the result of the IN subquery. The
+    temporary table has one hash index on all its columns.
+  - Create a new result sink that sends the result stream of the subquery to
+    the temporary table,
+
+  @notice:
+    Currently Item_subselect::init() already chooses and creates at parse
+    time an engine with a corresponding JOIN to execute the subquery.
+
+  @retval TRUE  if error
+  @retval FALSE otherwise
+*/
+
+bool subselect_hash_sj_engine::init(List<Item> *tmp_columns, uint subquery_id)
+{
+  select_union *result_sink;
+  /* Options to create_tmp_table. */
+  ulonglong tmp_create_options= thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS;
+                             /* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */
+
+  DBUG_ENTER("subselect_hash_sj_engine::init");
+
+  if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements,
+                            thd->mem_root) ||
+      bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements,
+                            thd->mem_root))
+    DBUG_RETURN(TRUE);
+
+  /*
+    Create and initialize a select result interceptor that stores the
+    result stream in a temporary table. The temporary table itself is
+    managed (created/filled/etc) internally by the interceptor.
+  */
+/*
+  TIMOUR:
+  Select a more efficient result sink when we know there is no need to collect
+  data statistics.
+
+  if (strategy == COMPLETE_MATCH)
+  {
+    if (!(result= new select_union))
+      DBUG_RETURN(TRUE);
+  }
+  else if (strategy == PARTIAL_MATCH)
+  {
+  if (!(result= new select_materialize_with_stats))
+    DBUG_RETURN(TRUE);
+  }
+*/
+  if (!(result_sink= new select_materialize_with_stats))
+    DBUG_RETURN(TRUE);
+    
+  char buf[32];
+  uint len= my_snprintf(buf, sizeof(buf), "<subquery%d>", subquery_id);
+  char *name;
+  if (!(name= (char*)thd->alloc(len + 1)))
+    DBUG_RETURN(TRUE);
+  memcpy(name, buf, len+1);
+
+  result_sink->get_tmp_table_param()->materialized_subquery= true;
+  if (result_sink->create_result_table(thd, tmp_columns, TRUE,
+                                       tmp_create_options,
+				       name, TRUE, TRUE))
+    DBUG_RETURN(TRUE);
+
+  tmp_table= result_sink->table;
+  result= result_sink;
+
+  /*
+    If the subquery has blobs, or the total key lenght is bigger than
+    some length, or the total number of key parts is more than the
+    allowed maximum (currently MAX_REF_PARTS == 16), then the created
+    index cannot be used for lookups and we can't use hash semi
+    join. If this is the case, delete the temporary table since it
+    will not be used, and tell the caller we failed to initialize the
+    engine.
+  */
+  if (tmp_table->s->keys == 0)
+  {
+    DBUG_ASSERT(
+      tmp_table->s->uniques ||
+      tmp_table->key_info->key_length >= tmp_table->file->max_key_length() ||
+      tmp_table->key_info->key_parts > tmp_table->file->max_key_parts());
+    free_tmp_table(thd, tmp_table);
+    tmp_table= NULL;
+    delete result;
+    result= NULL;
+    DBUG_RETURN(TRUE);
+  }
+
+  /*
+    Make sure there is only one index on the temp table, and it doesn't have
+    the extra key part created when s->uniques > 0.
+  */
+  DBUG_ASSERT(tmp_table->s->keys == 1 &&
+              ((Item_in_subselect *) item)->left_expr->cols() ==
+              tmp_table->key_info->key_parts);
+
+  if (make_semi_join_conds() ||
+      /* A unique_engine is used both for complete and partial matching. */
+      !(lookup_engine= make_unique_engine()))
+    DBUG_RETURN(TRUE);
+
+  /*
+    Repeat name resolution for 'cond' since cond is not part of any
+    clause of the query, and it is not 'fixed' during JOIN::prepare.
+  */
+  if (semi_join_conds && !semi_join_conds->fixed &&
+      semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
+    DBUG_RETURN(TRUE);
+  /* Let our engine reuse this query plan for materialization. */
+  materialize_join= materialize_engine->join;
+  materialize_join->change_result(result);
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Create an artificial condition to post-filter those rows matched by index
+  lookups that cannot be distinguished by the index lookup procedure.
+
+  @notes
+  The need for post-filtering may occur e.g. because of
+  truncation. Prepared statements execution requires that fix_fields is
+  called for every execution. In order to call fix_fields we need to
+  create a Name_resolution_context and a corresponding TABLE_LIST for
+  the temporary table for the subquery, so that all column references
+  to the materialized subquery table can be resolved correctly.
+
+  @returns
+    @retval TRUE  memory allocation error occurred
+    @retval FALSE the conditions were created and resolved (fixed)
+*/
+
+bool subselect_hash_sj_engine::make_semi_join_conds()
+{
+  /*
+    Table reference for tmp_table that is used to resolve column references
+    (Item_fields) to columns in tmp_table.
+  */
+  TABLE_LIST *tmp_table_ref;
+  /* Name resolution context for all tmp_table columns created below. */
+  Name_resolution_context *context;
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+
+  DBUG_ENTER("subselect_hash_sj_engine::make_semi_join_conds");
+  DBUG_ASSERT(semi_join_conds == NULL);
+
+  if (!(semi_join_conds= new Item_cond_and))
+    DBUG_RETURN(TRUE);
+
+  if (!(tmp_table_ref= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST))))
+    DBUG_RETURN(TRUE);
+
+  tmp_table_ref->init_one_table(STRING_WITH_LEN(""),
+                                tmp_table->alias.c_ptr(),
+                                tmp_table->alias.length(),
+                                NULL, TL_READ);
+  tmp_table_ref->table= tmp_table;
+
+  context= new Name_resolution_context;
+  context->init();
+  context->first_name_resolution_table=
+    context->last_name_resolution_table= tmp_table_ref;
+  semi_join_conds_context= context;
+  
+  for (uint i= 0; i < item_in->left_expr->cols(); i++)
+  {
+    Item_func_eq *eq_cond; /* New equi-join condition for the current column. */
+    /* Item for the corresponding field from the materialized temp table. */
+    Item_field *right_col_item;
+
+    if (!(right_col_item= new Item_field(thd, context, tmp_table->field[i])) ||
+        !(eq_cond= new Item_func_eq(item_in->left_expr->element_index(i),
+                                    right_col_item)) ||
+        (((Item_cond_and*)semi_join_conds)->add(eq_cond)))
+    {
+      delete semi_join_conds;
+      semi_join_conds= NULL;
+      DBUG_RETURN(TRUE);
+    }
+  }
+  if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
+    DBUG_RETURN(TRUE);
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Create a new uniquesubquery engine for the execution of an IN predicate.
+
+  @details
+  Create and initialize a new JOIN_TAB, and Table_ref objects to perform
+  lookups into the indexed temporary table.
+
+  @retval A new subselect_hash_sj_engine object
+  @retval NULL if a memory allocation error occurs
+*/
+
+subselect_uniquesubquery_engine*
+subselect_hash_sj_engine::make_unique_engine()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  Item_iterator_row it(item_in->left_expr);
+  /* The only index on the temporary table. */
+  KEY *tmp_key= tmp_table->key_info;
+  JOIN_TAB *tab;
+
+  DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine");
+
+  /*
+    Create and initialize the JOIN_TAB that represents an index lookup
+    plan operator into the materialized subquery result. Notice that:
+    - this JOIN_TAB has no corresponding JOIN (and doesn't need one), and
+    - here we initialize only those members that are used by
+      subselect_uniquesubquery_engine, so these objects are incomplete.
+  */
+  if (!(tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB))))
+    DBUG_RETURN(NULL);
+
+  tab->table= tmp_table;
+  tab->preread_init_done= FALSE;
+  tab->ref.tmp_table_index_lookup_init(thd, tmp_key, it, FALSE);
+
+  DBUG_RETURN(new subselect_uniquesubquery_engine(thd, tab, item,
+                                                  semi_join_conds));
+}
+
+
+subselect_hash_sj_engine::~subselect_hash_sj_engine()
+{
+  delete lookup_engine;
+  delete result;
+  if (tmp_table)
+    free_tmp_table(thd, tmp_table);
+}
+
+
+int subselect_hash_sj_engine::prepare()
+{
+  /*
+    Create and optimize the JOIN that will be used to materialize
+    the subquery if not yet created.
+  */
+  return materialize_engine->prepare();
+}
+
+
+/**
+  Cleanup performed after each PS execution.
+
+  @details
+  Called in the end of JOIN::prepare for PS from Item_subselect::cleanup.
+*/
+
+void subselect_hash_sj_engine::cleanup()
+{
+  enum_engine_type lookup_engine_type= lookup_engine->engine_type();
+  is_materialized= FALSE;
+  bitmap_clear_all(&non_null_key_parts);
+  bitmap_clear_all(&partial_match_key_parts);
+  count_partial_match_columns= 0;
+  count_null_only_columns= 0;
+  strategy= UNDEFINED;
+  materialize_engine->cleanup();
+  /*
+    Restore the original Item_in_subselect engine. This engine is created once
+    at parse time and stored across executions, while all other materialization
+    related engines are created and chosen for each execution.
+  */
+  ((Item_in_subselect *) item)->engine= materialize_engine;
+  if (lookup_engine_type == TABLE_SCAN_ENGINE ||
+      lookup_engine_type == ROWID_MERGE_ENGINE)
+  {
+    subselect_engine *inner_lookup_engine;
+    inner_lookup_engine=
+      ((subselect_partial_match_engine*) lookup_engine)->lookup_engine;
+    /*
+      Partial match engines are recreated for each PS execution inside
+      subselect_hash_sj_engine::exec().
+    */
+    delete lookup_engine;
+    lookup_engine= inner_lookup_engine;
+  }
+  DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
+  lookup_engine->cleanup();
+  result->cleanup(); /* Resets the temp table as well. */
+  DBUG_ASSERT(tmp_table);
+  free_tmp_table(thd, tmp_table);
+  tmp_table= NULL;
+}
+
+
+/*
+  Get fanout produced by tables specified in the table_map
+*/
+
+double get_fanout_with_deps(JOIN *join, table_map tset)
+{
+  /* Handle the case of "Impossible WHERE" */
+  if (join->table_count == 0)
+    return 0.0;
+
+  /* First, recursively get all tables we depend on */
+  table_map deps_to_check= tset;
+  table_map checked_deps= 0;
+  table_map further_deps;
+  do
+  {
+    further_deps= 0;
+    Table_map_iterator tm_it(deps_to_check);
+    int tableno;
+    while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+    {
+      /* get tableno's dependency tables that are not in needed_set */
+      further_deps |= join->map2table[tableno]->ref.depend_map & ~checked_deps;
+    }
+
+    checked_deps |= deps_to_check;
+    deps_to_check= further_deps;
+  } while (further_deps != 0);
+
+  
+  /* Now, walk the join order and calculate the fanout */
+  double fanout= 1;
+  for (JOIN_TAB *tab= first_top_level_tab(join, WITHOUT_CONST_TABLES); tab;
+       tab= next_top_level_tab(join, tab))
+  {
+    if ((tab->table->map & checked_deps) && !tab->emb_sj_nest && 
+        tab->records_read != 0)
+    {
+      fanout *= rows2double(tab->records_read);
+    }
+  } 
+  return fanout;
+}
+
+
+#if 0
+void check_out_index_stats(JOIN *join)
+{
+  ORDER *order;
+  uint n_order_items;
+
+  /*
+    First, collect the keys that we can use in each table.
+    We can use a key if 
+    - all tables refer to it.
+  */
+  key_map key_start_use[MAX_TABLES];
+  key_map key_infix_use[MAX_TABLES];
+  table_map key_used=0;
+  table_map non_key_used= 0;
+  
+  bzero(&key_start_use, sizeof(key_start_use)); //psergey-todo: safe initialization!
+  bzero(&key_infix_use, sizeof(key_infix_use));
+  
+  for (order= join->group_list; order; order= order->next)
+  {
+    Item *item= order->item[0];
+
+    if (item->real_type() == Item::FIELD_ITEM)
+    {
+      if (item->used_tables() & OUTER_REF_TABLE_BIT)
+        continue; /* outside references are like constants for us */
+
+      Field *field= ((Item_field*)item->real_item())->field;
+      uint table_no= field->table->tablenr;
+      if (!(non_key_used && table_map(1) << table_no) && 
+          !field->part_of_key.is_clear_all())
+      {
+        key_map infix_map= field->part_of_key;
+        infix_map.subtract(field->key_start);
+        key_start_use[table_no].merge(field->key_start);
+        key_infix_use[table_no].merge(infix_map);
+        key_used |= table_no;
+      }
+      continue;
+    }
+    /* 
+      Note: the below will cause clauses like GROUP BY YEAR(date) not to be
+      handled. 
+    */
+    non_key_used |= item->used_tables();
+  }
+  
+  Table_map_iterator tm_it(key_used & ~non_key_used);
+  int tableno;
+  while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+  {
+    key_map::iterator key_it(key_start_use);
+    int keyno;
+    while ((keyno = tm_it.next_bit()) != key_map::iterator::BITMAP_END)
+    {
+      for (order= join->group_list; order; order= order->next)
+      {
+        Item *item= order->item[0];
+        if (item->used_tables() & (table_map(1) << tableno))
+        {
+          DBUG_ASSERT(item->real_type() == Item::FIELD_ITEM);
+        }
+      }
+      /*
+      if (continuation)
+      {
+        walk through list and find which key parts are occupied;
+        // note that the above can't be made any faster.
+      }
+      else
+        use rec_per_key[0];
+      
+      find out the cardinality.
+      check if cardinality decreases if we use it;
+      */
+    }
+  }
+}
+#endif
+
+
+/*
+  Get an estimate of how many records will be produced after the GROUP BY
+  operation.
+
+  @param join           Join we're operating on 
+  @param join_op_rows   How many records will be produced by the join
+                        operations (this is what join optimizer produces)
+  
+  @seealso
+     See also optimize_semijoin_nests(), grep for "Adjust output cardinality 
+     estimates".  Very similar code there that is not joined with this one
+     because we operate on different data structs and too much effort is
+     needed to abstract them out.
+
+  @return
+     Number of records we expect to get after the GROUP BY operation
+*/
+
+double get_post_group_estimate(JOIN* join, double join_op_rows)
+{
+  table_map tables_in_group_list= table_map(0);
+
+  /* Find out which tables are used in GROUP BY list */
+  for (ORDER *order= join->group_list; order; order= order->next)
+  {
+    Item *item= order->item[0];
+    if (item->used_tables() & RAND_TABLE_BIT)
+    {
+      /* Each join output record will be in its own group */
+      return join_op_rows;
+    }
+    tables_in_group_list|= item->used_tables();
+  }
+  tables_in_group_list &= ~PSEUDO_TABLE_BITS;
+
+  /*
+    Use join fanouts to calculate the max. number of records in the group-list
+  */
+  double fanout_rows[MAX_KEY];
+  bzero(&fanout_rows, sizeof(fanout_rows));
+  double out_rows;
+  
+  out_rows= get_fanout_with_deps(join, tables_in_group_list);
+
+#if 0
+  /* The following will be needed when making use of index stats: */
+  /* 
+    Also generate max. number of records for each of the tables mentioned 
+    in the group-list. We'll use that a baseline number that we'll try to 
+    reduce by using
+     - #table-records 
+     - index statistics.
+  */
+  Table_map_iterator tm_it(tables_in_group_list);
+  int tableno;
+  while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+  {
+    fanout_rows[tableno]= get_fanout_with_deps(join, table_map(1) << tableno);
+  }
+  
+  /*
+    Try to bring down estimates using index statistics.
+  */
+  //check_out_index_stats(join);
+#endif
+
+  return out_rows;
+}
+
+
+/**
+  Execute a subquery IN predicate via materialization.
+
+  @details
+  If needed materialize the subquery into a temporary table, then
+  copmpute the predicate via a lookup into this table.
+
+  @retval TRUE  if error
+  @retval FALSE otherwise
+*/
+
+int subselect_hash_sj_engine::exec()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  SELECT_LEX *save_select= thd->lex->current_select;
+  subselect_partial_match_engine *pm_engine= NULL;
+  int res= 0;
+
+  DBUG_ENTER("subselect_hash_sj_engine::exec");
+
+  /*
+    Optimize and materialize the subquery during the first execution of
+    the subquery predicate.
+  */
+  thd->lex->current_select= materialize_engine->select_lex;
+  /* The subquery should be optimized, and materialized only once. */
+  DBUG_ASSERT(materialize_join->optimized && !is_materialized);
+  materialize_join->exec();
+  if ((res= test(materialize_join->error || thd->is_fatal_error)))
+    goto err;
+
+  /*
+    TODO:
+    - Unlock all subquery tables as we don't need them. To implement this
+      we need to add new functionality to JOIN::join_free that can unlock
+      all tables in a subquery (and all its subqueries).
+    - The temp table used for grouping in the subquery can be freed
+      immediately after materialization (yet it's done together with
+      unlocking).
+  */
+  is_materialized= TRUE;
+  /*
+    If the subquery returned no rows, the temporary table is empty, so we know
+    directly that the result of IN is FALSE. We first update the table
+    statistics, then we test if the temporary table for the query result is
+    empty.
+  */
+  tmp_table->file->info(HA_STATUS_VARIABLE);
+  if (!tmp_table->file->stats.records)
+  {
+    /* The value of IN will not change during this execution. */
+    item_in->reset();
+    item_in->make_const();
+    item_in->set_first_execution();
+    DBUG_RETURN(FALSE);
+  }
+
+  /*
+    TIMOUR: The schema-based analysis for partial matching can be done once for
+    prepared statement and remembered. It is done here to remove the need to
+    save/restore all related variables between each re-execution, thus making
+    the code simpler.
+  */
+  strategy= get_strategy_using_schema();
+  /* This call may discover that we don't need partial matching at all. */
+  strategy= get_strategy_using_data();
+  if (strategy == PARTIAL_MATCH)
+  {
+    uint count_pm_keys; /* Total number of keys needed for partial matching. */
+    MY_BITMAP *nn_key_parts= NULL; /* Key parts of the only non-NULL index. */
+    uint count_non_null_columns= 0; /* Number of columns in nn_key_parts. */
+    bool has_covering_null_row;
+    bool has_covering_null_columns;
+    select_materialize_with_stats *result_sink=
+      (select_materialize_with_stats *) result;
+    uint field_count= tmp_table->s->fields;
+
+    if (count_partial_match_columns < field_count)
+    {
+      nn_key_parts= &non_null_key_parts;
+      count_non_null_columns= bitmap_bits_set(nn_key_parts);
+    }
+    has_covering_null_row= (result_sink->get_max_nulls_in_row() == field_count);
+    has_covering_null_columns= (count_non_null_columns +
+                                count_null_only_columns == field_count);
+
+    if (has_covering_null_row && has_covering_null_columns)
+    {
+      /*
+        The whole table consist of only NULL values. The result of IN is
+        a constant UNKNOWN.
+      */
+      DBUG_ASSERT(tmp_table->file->stats.records == 1);
+      item_in->value= 0;
+      item_in->null_value= 1;
+      item_in->make_const();
+      item_in->set_first_execution();
+      DBUG_RETURN(FALSE);
+    }
+
+    if (has_covering_null_row)
+    {
+      DBUG_ASSERT(count_partial_match_columns = field_count);
+      count_pm_keys= 0;
+    }
+    else if (has_covering_null_columns)
+      count_pm_keys= 1;
+    else
+      count_pm_keys= count_partial_match_columns - count_null_only_columns +
+                     (nn_key_parts ? 1 : 0);
+
+    choose_partial_match_strategy(test(nn_key_parts),
+                                  has_covering_null_row,
+                                  &partial_match_key_parts);
+    DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE ||
+                strategy == PARTIAL_MATCH_SCAN);
+    if (strategy == PARTIAL_MATCH_MERGE)
+    {
+      pm_engine=
+        new subselect_rowid_merge_engine(thd, (subselect_uniquesubquery_engine*)
+                                         lookup_engine, tmp_table,
+                                         count_pm_keys,
+                                         has_covering_null_row,
+                                         has_covering_null_columns,
+                                         item, result,
+                                         semi_join_conds->argument_list());
+      if (!pm_engine ||
+          ((subselect_rowid_merge_engine*) pm_engine)->
+            init(nn_key_parts, &partial_match_key_parts))
+      {
+        /*
+          The call to init() would fail if there was not enough memory to allocate
+          all buffers for the rowid merge strategy. In this case revert to table
+          scanning which doesn't need any big buffers.
+        */
+        delete pm_engine;
+        pm_engine= NULL;
+        strategy= PARTIAL_MATCH_SCAN;
+      }
+    }
+
+    if (strategy == PARTIAL_MATCH_SCAN)
+    {
+      if (!(pm_engine=
+            new subselect_table_scan_engine(thd, (subselect_uniquesubquery_engine*)
+                                            lookup_engine, tmp_table,
+                                            item, result,
+                                            semi_join_conds->argument_list(),
+                                            has_covering_null_row,
+                                            has_covering_null_columns)))
+      {
+        /* This is an irrecoverable error. */
+        res= 1;
+        goto err;
+      }
+    }
+  }
+
+  if (pm_engine)
+    lookup_engine= pm_engine;
+  item_in->change_engine(lookup_engine);
+
+err:
+  thd->lex->current_select= save_select;
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Print the state of this engine into a string for debugging and views.
+*/
+
+void subselect_hash_sj_engine::print(String *str, enum_query_type query_type)
+{
+  str->append(STRING_WITH_LEN(" <materialize> ("));
+  materialize_engine->print(str, query_type);
+  str->append(STRING_WITH_LEN(" ), "));
+
+  if (lookup_engine)
+    lookup_engine->print(str, query_type);
+  else
+    str->append(STRING_WITH_LEN(
+           "<engine selected at execution time>"
+         ));
+}
+
+void subselect_hash_sj_engine::fix_length_and_dec(Item_cache** row)
+{
+  DBUG_ASSERT(FALSE);
+}
+
+void subselect_hash_sj_engine::exclude()
+{
+  DBUG_ASSERT(FALSE);
+}
+
+bool subselect_hash_sj_engine::no_tables()
+{
+  DBUG_ASSERT(FALSE);
+  return FALSE;
+}
+
+bool subselect_hash_sj_engine::change_result(Item_subselect *si,
+                                             select_result_interceptor *res,
+                                             bool temp __attribute__((unused)))
+{
+  DBUG_ASSERT(FALSE);
+  return TRUE;
+}
+
+
+Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
+                         ha_rows null_count_arg, ha_rows min_null_row_arg,
+                         ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg)
+  : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg),
+    row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg)
+{
+  DBUG_ASSERT(tbl->file->stats.records > null_count);
+  key_buff_elements= tbl->file->stats.records - null_count;
+  cur_key_idx= HA_POS_ERROR;
+
+  DBUG_ASSERT((null_count && min_null_row_arg && max_null_row_arg) ||
+              (!null_count && !min_null_row_arg && !max_null_row_arg));
+  if (null_count)
+  {
+    /* The counters are 1-based, for key access we need 0-based indexes. */
+    min_null_row= min_null_row_arg - 1;
+    max_null_row= max_null_row_arg - 1;
+  }
+  else
+    min_null_row= max_null_row= 0;
+}
+
+
+Ordered_key::~Ordered_key()
+{
+  my_free(key_buff);
+  bitmap_free(&null_key);
+}
+
+
+/*
+  Cleanup that needs to be done for each PS (re)execution.
+*/
+
+void Ordered_key::cleanup()
+{
+  /*
+    Currently these keys are recreated for each PS re-execution, thus
+    there is nothing to cleanup, the whole object goes away after execution
+    is over. All handler related initialization/deinitialization is done by
+    the parent subselect_rowid_merge_engine object.
+  */
+}
+
+
+/*
+  Initialize a multi-column index.
+*/
+
+bool Ordered_key::init(MY_BITMAP *columns_to_index)
+{
+  THD *thd= tbl->in_use;
+  uint cur_key_col= 0;
+  Item_field *cur_tmp_field;
+  Item_func_lt *fn_less_than;
+
+  key_column_count= bitmap_bits_set(columns_to_index);
+
+  // TIMOUR: check for mem allocation err, revert to scan
+
+  key_columns= (Item_field**) thd->alloc(key_column_count *
+                                         sizeof(Item_field*));
+  compare_pred= (Item_func_lt**) thd->alloc(key_column_count *
+                                            sizeof(Item_func_lt*));
+
+  for (uint i= 0; i < columns_to_index->n_bits; i++)
+  {
+    if (!bitmap_is_set(columns_to_index, i))
+      continue;
+    cur_tmp_field= new Item_field(tbl->field[i]);
+    /* Create the predicate (tmp_column[i] < outer_ref[i]). */
+    fn_less_than= new Item_func_lt(cur_tmp_field,
+                                   search_key->element_index(i));
+    fn_less_than->fix_fields(thd, (Item**) &fn_less_than);
+    key_columns[cur_key_col]= cur_tmp_field;
+    compare_pred[cur_key_col]= fn_less_than;
+    ++cur_key_col;
+  }
+
+  if (alloc_keys_buffers())
+  {
+    /* TIMOUR revert to partial match via table scan. */
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Initialize a single-column index.
+*/
+
+bool Ordered_key::init(int col_idx)
+{
+  THD *thd= tbl->in_use;
+
+  key_column_count= 1;
+
+  // TIMOUR: check for mem allocation err, revert to scan
+
+  key_columns= (Item_field**) thd->alloc(sizeof(Item_field*));
+  compare_pred= (Item_func_lt**) thd->alloc(sizeof(Item_func_lt*));
+
+  key_columns[0]= new Item_field(tbl->field[col_idx]);
+  /* Create the predicate (tmp_column[i] < outer_ref[i]). */
+  compare_pred[0]= new Item_func_lt(key_columns[0],
+                                    search_key->element_index(col_idx));
+  compare_pred[0]->fix_fields(thd, (Item**)&compare_pred[0]);
+
+  if (alloc_keys_buffers())
+  {
+    /* TIMOUR revert to partial match via table scan. */
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Allocate the buffers for both the row number, and the NULL-bitmap indexes.
+*/
+
+bool Ordered_key::alloc_keys_buffers()
+{
+  DBUG_ASSERT(key_buff_elements > 0);
+
+  if (!(key_buff= (rownum_t*) my_malloc((size_t)(key_buff_elements * 
+    sizeof(rownum_t)), MYF(MY_WME))))
+    return TRUE;
+
+  /*
+    TIMOUR: it is enough to create bitmaps with size
+    (max_null_row - min_null_row), and then use min_null_row as
+    lookup offset.
+  */
+  /* Notice that max_null_row is max array index, we need count, so +1. */
+  if (bitmap_init(&null_key, NULL, (uint)(max_null_row + 1), FALSE))
+    return TRUE;
+
+  cur_key_idx= HA_POS_ERROR;
+
+  return FALSE;
+}
+
+
+/*
+  Quick sort comparison function that compares two rows of the same table
+  indentfied with their row numbers.
+
+  @retval -1
+  @retval  0
+  @retval +1
+*/
+
+int
+Ordered_key::cmp_keys_by_row_data(ha_rows a, ha_rows b)
+{
+  uchar *rowid_a, *rowid_b;
+  int error, cmp_res;
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tbl->file->ref_length;
+
+  if (a == b)
+    return 0;
+  /* Get the corresponding rowids. */
+  rowid_a= row_num_to_rowid + a * rowid_length;
+  rowid_b= row_num_to_rowid + b * rowid_length;
+  /* Fetch the rows for comparison. */
+  error= tbl->file->ha_rnd_pos(tbl->record[0], rowid_a);
+  DBUG_ASSERT(!error);
+  error= tbl->file->ha_rnd_pos(tbl->record[1], rowid_b);
+  DBUG_ASSERT(!error);
+  /*
+    Compare the two rows by the corresponding values of the indexed
+    columns.
+  */
+  for (uint i= 0; i < key_column_count; i++)
+  {
+    Field *cur_field= key_columns[i]->field;
+    if ((cmp_res= cur_field->cmp_offset(tbl->s->rec_buff_length)))
+      return (cmp_res > 0 ? 1 : -1);
+  }
+  return 0;
+}
+
+
+int
+Ordered_key::cmp_keys_by_row_data_and_rownum(Ordered_key *key,
+                                             rownum_t* a, rownum_t* b)
+{
+  /* The result of comparing the two keys according to their row data. */
+  int cmp_row_res= key->cmp_keys_by_row_data(*a, *b);
+  if (cmp_row_res)
+    return cmp_row_res;
+  return (*a < *b) ? -1 : (*a > *b) ? 1 : 0;
+}
+
+
+void Ordered_key::sort_keys()
+{
+  my_qsort2(key_buff, (size_t) key_buff_elements, sizeof(rownum_t),
+            (qsort2_cmp) &cmp_keys_by_row_data_and_rownum, (void*) this);
+  /* Invalidate the current row position. */
+  cur_key_idx= HA_POS_ERROR;
+}
+
+
+/*
+  The fraction of rows that do not contain NULL in the columns indexed by
+  this key.
+
+  @retval  1  if there are no NULLs
+  @retval  0  if only NULLs
+*/
+
+double Ordered_key::null_selectivity()
+{
+  /* We should not be processing empty tables. */
+  DBUG_ASSERT(tbl->file->stats.records);
+  return (1 - (double) null_count / (double) tbl->file->stats.records);
+}
+
+
+/*
+  Compare the value(s) of the current key in 'search_key' with the
+  data of the current table record.
+
+  @notes The comparison result follows from the way compare_pred
+  is created in Ordered_key::init. Currently compare_pred compares
+  a field in of the current row with the corresponding Item that
+  contains the search key.
+
+  @param row_num  Number of the row (not index in the key_buff array)
+
+  @retval -1  if (current row  < search_key)
+  @retval  0  if (current row == search_key)
+  @retval +1  if (current row  > search_key)
+*/
+
+int Ordered_key::cmp_key_with_search_key(rownum_t row_num)
+{
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tbl->file->ref_length;
+  uchar *cur_rowid= row_num_to_rowid + row_num * rowid_length;
+  int error, cmp_res;
+
+  error= tbl->file->ha_rnd_pos(tbl->record[0], cur_rowid);
+  DBUG_ASSERT(!error);
+
+  for (uint i= 0; i < key_column_count; i++)
+  {
+    cmp_res= compare_pred[i]->get_comparator()->compare();
+    /* Unlike Arg_comparator::compare_row() here there should be no NULLs. */
+    DBUG_ASSERT(!compare_pred[i]->null_value);
+    if (cmp_res)
+      return (cmp_res > 0 ? 1 : -1);
+  }
+  return 0;
+}
+
+
+/*
+  Find a key in a sorted array of keys via binary search.
+
+  see create_subq_in_equalities()
+*/
+
+bool Ordered_key::lookup()
+{
+  DBUG_ASSERT(key_buff_elements);
+
+  ha_rows lo= 0;
+  ha_rows hi= key_buff_elements - 1;
+  ha_rows mid;
+  int cmp_res;
+
+  while (lo <= hi)
+  {
+    mid= lo + (hi - lo) / 2;
+    cmp_res= cmp_key_with_search_key(key_buff[mid]);
+    /*
+      In order to find the minimum match, check if the pevious element is
+      equal or smaller than the found one. If equal, we need to search further
+      to the left.
+    */
+    if (!cmp_res && mid > 0)
+      cmp_res= !cmp_key_with_search_key(key_buff[mid - 1]) ? 1 : 0;
+
+    if (cmp_res == -1)
+    {
+      /* row[mid] < search_key */
+      lo= mid + 1;
+    }
+    else if (cmp_res == 1)
+    {
+      /* row[mid] > search_key */
+      if (!mid)
+        goto not_found;
+      hi= mid - 1;
+    }
+    else
+    {
+      /* row[mid] == search_key */
+      cur_key_idx= mid;
+      return TRUE;
+    }
+  }
+not_found:
+  cur_key_idx= HA_POS_ERROR;
+  return FALSE;
+}
+
+
+/*
+  Move the current index pointer to the next key with the same column
+  values as the current key. Since the index is sorted, all such keys
+  are contiguous.
+*/
+
+bool Ordered_key::next_same()
+{
+  DBUG_ASSERT(key_buff_elements);
+
+  if (cur_key_idx < key_buff_elements - 1)
+  {
+    /*
+      TIMOUR:
+      The below is quite inefficient, since as a result we will fetch every
+      row (except the last one) twice. There must be a more efficient way,
+      e.g. swapping record[0] and record[1], and reading only the new record.
+    */
+    if (!cmp_keys_by_row_data(key_buff[cur_key_idx], key_buff[cur_key_idx + 1]))
+    {
+      ++cur_key_idx;
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+
+void Ordered_key::print(String *str)
+{
+  uint i;
+  str->append("{idx=");
+  str->qs_append(keyid);
+  str->append(", (");
+  for (i= 0; i < key_column_count - 1; i++)
+  {
+    str->append(key_columns[i]->field->field_name);
+    str->append(", ");
+  }
+  str->append(key_columns[i]->field->field_name);
+  str->append("), ");
+
+  str->append("null_bitmap: (bits=");
+  str->qs_append(null_key.n_bits);
+  str->append(", nulls= ");
+  str->qs_append((double)null_count);
+  str->append(", min_null= ");
+  str->qs_append((double)min_null_row);
+  str->append(", max_null= ");
+  str->qs_append((double)max_null_row);
+  str->append("), ");
+
+  str->append('}');
+}
+
+
+subselect_partial_match_engine::subselect_partial_match_engine(
+  THD *thd_arg, subselect_uniquesubquery_engine *engine_arg,
+  TABLE *tmp_table_arg, Item_subselect *item_arg,
+  select_result_interceptor *result_arg,
+  List<Item> *equi_join_conds_arg,
+  bool has_covering_null_row_arg,
+  bool has_covering_null_columns_arg)
+  :subselect_engine(thd_arg, item_arg, result_arg),
+   tmp_table(tmp_table_arg), lookup_engine(engine_arg),
+   equi_join_conds(equi_join_conds_arg),
+   has_covering_null_row(has_covering_null_row_arg),
+   has_covering_null_columns(has_covering_null_columns_arg)
+{}
+
+
+int subselect_partial_match_engine::exec()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  int res;
+
+  /* Try to find a matching row by index lookup. */
+  res= lookup_engine->copy_ref_key_simple();
+  if (res == -1)
+  {
+    /* The result is FALSE based on the outer reference. */
+    item_in->value= 0;
+    item_in->null_value= 0;
+    return 0;
+  }
+  else if (res == 0)
+  {
+    /* Search for a complete match. */
+    if ((res= lookup_engine->index_lookup()))
+    {
+      /* An error occured during lookup(). */
+      item_in->value= 0;
+      item_in->null_value= 0;
+      return res;
+    }
+    else if (item_in->value)
+    {
+      /*
+        A complete match was found, the result of IN is TRUE.
+        Notice: (this->item == lookup_engine->item)
+      */
+      return 0;
+    }
+  }
+
+  if (has_covering_null_row)
+  {
+    /*
+      If there is a NULL-only row that coveres all columns the result of IN
+      is UNKNOWN. 
+    */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 1;
+    item_in->null_value= 1;
+    return 0;
+  }
+
+  /*
+    There is no complete match. Look for a partial match (UNKNOWN result), or
+    no match (FALSE).
+  */
+  if (tmp_table->file->inited)
+    tmp_table->file->ha_index_end();
+
+  if (partial_match())
+  {
+    /* The result of IN is UNKNOWN. */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 1;
+    item_in->null_value= 1;
+  }
+  else
+  {
+    /* The result of IN is FALSE. */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 0;
+    item_in->null_value= 0;
+  }
+
+  return 0;
+}
+
+
+void subselect_partial_match_engine::print(String *str,
+                                           enum_query_type query_type)
+{
+  /*
+    Should never be called as the actual engine cannot be known at query
+    optimization time.
+    DBUG_ASSERT(FALSE);
+  */
+}
+
+
+/*
+  @param non_null_key_parts  
+  @param partial_match_key_parts  A union of all single-column NULL key parts.
+
+  @retval FALSE  the engine was initialized successfully
+  @retval TRUE   there was some (memory allocation) error during initialization,
+                 such errors should be interpreted as revert to other strategy
+*/
+
+bool
+subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
+                                   MY_BITMAP *partial_match_key_parts)
+{
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tmp_table->file->ref_length;
+  ha_rows row_count= tmp_table->file->stats.records;
+  rownum_t cur_rownum= 0;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+  uint cur_keyid= 0;
+  Item_in_subselect *item_in= (Item_in_subselect*) item;
+  int error;
+
+  if (merge_keys_count == 0)
+  {
+    DBUG_ASSERT(bitmap_bits_set(partial_match_key_parts) == 0 ||
+                has_covering_null_row);
+    /* There is nothing to initialize, we will only do regular lookups. */
+    return FALSE;
+  }
+
+  /*
+    If all nullable columns contain only NULLs, there must be one index
+    over all non-null columns.
+  */
+  DBUG_ASSERT(!has_covering_null_columns ||
+              (has_covering_null_columns &&
+               merge_keys_count == 1 && non_null_key_parts));
+  /*
+    Allocate buffers to hold the merged keys and the mapping between rowids and
+    row numbers.
+  */
+  if (!(merge_keys= (Ordered_key**) thd->alloc(merge_keys_count *
+                                               sizeof(Ordered_key*))) ||
+      !(row_num_to_rowid= (uchar*) my_malloc((size_t)(row_count * rowid_length),
+        MYF(MY_WME))))
+    return TRUE;
+
+  /* Create the only non-NULL key if there is any. */
+  if (non_null_key_parts)
+  {
+    non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
+                                  0, 0, 0, row_num_to_rowid);
+    if (non_null_key->init(non_null_key_parts))
+      return TRUE;
+    merge_keys[cur_keyid]= non_null_key;
+    merge_keys[cur_keyid]->first();
+    ++cur_keyid;
+  }
+
+  /*
+    If all nullable columns contain NULLs, the only key that is needed is the
+    only non-NULL key that is already created above.
+  */
+  if (!has_covering_null_columns)
+  {
+    if (bitmap_init_memroot(&matching_keys, merge_keys_count, thd->mem_root) ||
+        bitmap_init_memroot(&matching_outer_cols, merge_keys_count, thd->mem_root))
+      return TRUE;
+
+    /*
+      Create one single-column NULL-key for each column in
+      partial_match_key_parts.
+    */
+    for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+    {
+      /* Skip columns that have no NULLs, or contain only NULLs. */
+      if (!bitmap_is_set(partial_match_key_parts, i) ||
+          result_sink->get_null_count_of_col(i) == row_count)
+        continue;
+
+      merge_keys[cur_keyid]= new Ordered_key(
+                                     cur_keyid, tmp_table,
+                                     item_in->left_expr->element_index(i),
+                                     result_sink->get_null_count_of_col(i),
+                                     result_sink->get_min_null_of_col(i),
+                                     result_sink->get_max_null_of_col(i),
+                                     row_num_to_rowid);
+      if (merge_keys[cur_keyid]->init(i))
+        return TRUE;
+      merge_keys[cur_keyid]->first();
+      ++cur_keyid;
+    }
+  }
+  DBUG_ASSERT(cur_keyid == merge_keys_count);
+
+  /* Populate the indexes with data from the temporary table. */
+  if (tmp_table->file->ha_rnd_init_with_error(1))
+    return TRUE;
+  tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+                             current_thd->variables.read_buff_size);
+  tmp_table->null_row= 0;
+  while (TRUE)
+  {
+    error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
+    if (error == HA_ERR_RECORD_DELETED)
+    {
+      /* We get this for duplicate records that should not be in tmp_table. */
+      continue;
+    }
+    /*
+      This is a temp table that we fully own, there should be no other
+      cause to stop the iteration than EOF.
+    */
+    DBUG_ASSERT(!error || error == HA_ERR_END_OF_FILE);
+    if (error == HA_ERR_END_OF_FILE)
+    {
+      DBUG_ASSERT(cur_rownum == tmp_table->file->stats.records);
+      break;
+    }
+
+    /*
+      Save the position of this record in the row_num -> rowid mapping.
+    */
+    tmp_table->file->position(tmp_table->record[0]);
+    memcpy(row_num_to_rowid + cur_rownum * rowid_length,
+           tmp_table->file->ref, rowid_length);
+
+    /* Add the current row number to the corresponding keys. */
+    if (non_null_key)
+    {
+      /* By definition there are no NULLs in the non-NULL key. */
+      non_null_key->add_key(cur_rownum);
+    }
+
+    for (uint i= (non_null_key ? 1 : 0); i < merge_keys_count; i++)
+    {
+      /*
+        Check if the first and only indexed column contains NULL in the curent
+        row, and add the row number to the corresponding key.
+      */
+      if (tmp_table->field[merge_keys[i]->get_field_idx(0)]->is_null())
+        merge_keys[i]->set_null(cur_rownum);
+      else
+        merge_keys[i]->add_key(cur_rownum);
+    }
+    ++cur_rownum;
+  }
+
+  tmp_table->file->ha_rnd_end();
+
+  /* Sort all the keys by their NULL selectivity. */
+  my_qsort(merge_keys, merge_keys_count, sizeof(Ordered_key*),
+           (qsort_cmp) cmp_keys_by_null_selectivity);
+
+  /* Sort the keys in each of the indexes. */
+  for (uint i= 0; i < merge_keys_count; i++)
+    merge_keys[i]->sort_keys();
+
+  if (init_queue(&pq, merge_keys_count, 0, FALSE,
+                 subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL,
+                 0, 0))
+    return TRUE;
+
+  return FALSE;
+}
+
+
+subselect_rowid_merge_engine::~subselect_rowid_merge_engine()
+{
+  /* None of the resources below is allocated if there are no ordered keys. */
+  if (merge_keys_count)
+  {
+    my_free(row_num_to_rowid);
+    for (uint i= 0; i < merge_keys_count; i++)
+      delete merge_keys[i];
+    delete_queue(&pq);
+    if (tmp_table->file->inited == handler::RND)
+      tmp_table->file->ha_rnd_end();
+  }
+}
+
+
+void subselect_rowid_merge_engine::cleanup()
+{
+}
+
+
+/*
+  Quick sort comparison function to compare keys in order of decreasing bitmap
+  selectivity, so that the most selective keys come first.
+
+  @param  k1 first key to compare
+  @param  k2 second key to compare
+
+  @retval  1  if k1 is less selective than k2
+  @retval  0  if k1 is equally selective as k2
+  @retval -1  if k1 is more selective than k2
+*/
+
+int
+subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1,
+                                                           Ordered_key **k2)
+{
+  double k1_sel= (*k1)->null_selectivity();
+  double k2_sel= (*k2)->null_selectivity();
+  if (k1_sel < k2_sel)
+    return 1;
+  if (k1_sel > k2_sel)
+    return -1;
+  return 0;
+}
+
+
+/*
+*/
+
+int
+subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg,
+                                                     uchar *k1, uchar *k2)
+{
+  rownum_t r1= ((Ordered_key*) k1)->current();
+  rownum_t r2= ((Ordered_key*) k2)->current();
+
+  return (r1 < r2) ? -1 : (r1 > r2) ? 1 : 0;
+}
+
+
+/*
+  Check if certain table row contains a NULL in all columns for which there is
+  no match in the corresponding value index.
+
+  @note
+  There is no need to check the columns that contain only NULLs, because
+  those are guaranteed to match.
+
+  @retval TRUE if a NULL row exists
+  @retval FALSE otherwise
+*/
+
+bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num)
+{
+  Ordered_key *cur_key;
+  for (uint i = 0; i < merge_keys_count; i++)
+  {
+    cur_key= merge_keys[i];
+    if (bitmap_is_set(&matching_keys, cur_key->get_keyid()))
+    {
+      /*
+        The key 'i' (with id 'cur_keyid') already matches a value in row
+        'row_num', thus we skip it as it can't possibly match a NULL.
+      */
+      continue;
+    }
+    if (!cur_key->is_null(row_num))
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
+/*
+  @retval TRUE  there is a partial match (UNKNOWN)
+  @retval FALSE  there is no match at all (FALSE)
+*/
+
+bool subselect_rowid_merge_engine::partial_match()
+{
+  Ordered_key *min_key; /* Key that contains the current minimum position. */
+  rownum_t min_row_num; /* Current row number of min_key. */
+  Ordered_key *cur_key;
+  rownum_t cur_row_num;
+  uint count_nulls_in_search_key= 0;
+  bool res= FALSE;
+
+  /* If there is a non-NULL key, it must be the first key in the keys array. */
+  DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key));
+  /* The prioryty queue for keys must be empty. */
+  DBUG_ASSERT(!pq.elements);
+
+  /* All data accesses during execution are via handler::ha_rnd_pos() */
+  if (tmp_table->file->ha_rnd_init_with_error(0))
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  /* Check if there is a match for the columns of the only non-NULL key. */
+  if (non_null_key && !non_null_key->lookup())
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  /*
+    If all nullable columns contain only NULLs, then there is a guranteed
+    partial match, and we don't need to search for a matching row.
+  */
+  if (has_covering_null_columns)
+  {
+    res= TRUE;
+    goto end;
+  }
+
+  if (non_null_key)
+    queue_insert(&pq, (uchar *) non_null_key);
+  /*
+    Do not add the non_null_key, since it was already processed above.
+  */
+  bitmap_clear_all(&matching_outer_cols);
+  for (uint i= test(non_null_key); i < merge_keys_count; i++)
+  {
+    DBUG_ASSERT(merge_keys[i]->get_column_count() == 1);
+    if (merge_keys[i]->get_search_key(0)->null_value)
+    {
+      ++count_nulls_in_search_key;
+      bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid());
+    }
+    else if (merge_keys[i]->lookup())
+      queue_insert(&pq, (uchar *) merge_keys[i]);
+  }
+
+  /*
+    If the outer reference consists of only NULLs, or if it has NULLs in all
+    nullable columns, the result is UNKNOWN.
+  */
+  if (count_nulls_in_search_key ==
+      ((Item_in_subselect *) item)->left_expr->cols() -
+      (non_null_key ? non_null_key->get_column_count() : 0))
+  {
+    res= TRUE;
+    goto end;
+  }
+
+  /*
+    If there is no NULL (sub)row that covers all NULL columns, and there is no
+    single match for any of the NULL columns, the result is FALSE.
+  */
+  if (pq.elements - test(non_null_key) == 0)
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  DBUG_ASSERT(pq.elements);
+
+  min_key= (Ordered_key*) queue_remove_top(&pq);
+  min_row_num= min_key->current();
+  bitmap_set_bit(&matching_keys, min_key->get_keyid());
+  bitmap_union(&matching_keys, &matching_outer_cols);
+  if (min_key->next_same())
+    queue_insert(&pq, (uchar *) min_key);
+
+  if (pq.elements == 0)
+  {
+    /*
+      Check the only matching row of the only key min_key for NULL matches
+      in the other columns.
+    */
+    res= test_null_row(min_row_num);
+    goto end;
+  }
+
+  while (TRUE)
+  {
+    cur_key= (Ordered_key*) queue_remove_top(&pq);
+    cur_row_num= cur_key->current();
+
+    if (cur_row_num == min_row_num)
+      bitmap_set_bit(&matching_keys, cur_key->get_keyid());
+    else
+    {
+      /* Follows from the correct use of priority queue. */
+      DBUG_ASSERT(cur_row_num > min_row_num);
+      if (test_null_row(min_row_num))
+      {
+        res= TRUE;
+        goto end;
+      }
+      else
+      {
+        min_key= cur_key;
+        min_row_num= cur_row_num;
+        bitmap_set_bit(&matching_keys, min_key->get_keyid());
+        bitmap_union(&matching_keys, &matching_outer_cols);
+      }
+    }
+
+    if (cur_key->next_same())
+      queue_insert(&pq, (uchar *) cur_key);
+
+    if (pq.elements == 0)
+    {
+      /* Check the last row of the last column in PQ for NULL matches. */
+      res= test_null_row(min_row_num);
+      goto end;
+    }
+  }
+
+  /* We should never get here - all branches must be handled explicitly above. */
+  DBUG_ASSERT(FALSE);
+
+end:
+  queue_remove_all(&pq);
+  tmp_table->file->ha_rnd_end();
+  return res;
+}
+
+
+subselect_table_scan_engine::subselect_table_scan_engine(
+  THD *thd_arg, subselect_uniquesubquery_engine *engine_arg,
+  TABLE *tmp_table_arg,
+  Item_subselect *item_arg,
+  select_result_interceptor *result_arg,
+  List<Item> *equi_join_conds_arg,
+  bool has_covering_null_row_arg,
+  bool has_covering_null_columns_arg)
+  :subselect_partial_match_engine(thd_arg, engine_arg, tmp_table_arg, item_arg,
+                                  result_arg, equi_join_conds_arg,
+                                  has_covering_null_row_arg,
+                                  has_covering_null_columns_arg)
+{}
+
+
+/*
+  TIMOUR:
+  This method is based on subselect_uniquesubquery_engine::scan_table().
+  Consider refactoring somehow, 80% of the code is the same.
+
+  for each row_i in tmp_table
+  {
+    count_matches= 0;
+    for each row element row_i[j]
+    {
+      if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j])
+        ++count_matches;
+    }
+    if (count_matches == outer_ref.elements)
+      return TRUE
+  }
+  return FALSE
+*/
+
+bool subselect_table_scan_engine::partial_match()
+{
+  List_iterator_fast<Item> equality_it(*equi_join_conds);
+  Item *cur_eq;
+  uint count_matches;
+  int error;
+  bool res;
+
+  if (tmp_table->file->ha_rnd_init_with_error(1))
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+                             current_thd->variables.read_buff_size);
+  for (;;)
+  {
+    error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
+    if (error) {
+      if (error == HA_ERR_RECORD_DELETED)
+      {
+        error= 0;
+        continue;
+      }
+      if (error == HA_ERR_END_OF_FILE)
+      {
+        error= 0;
+        break;
+      }
+      else
+      {
+        error= report_error(tmp_table, error);
+        break;
+      }
+    }
+
+    equality_it.rewind();
+    count_matches= 0;
+    while ((cur_eq= equality_it++))
+    {
+      DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM &&
+                  ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC);
+      if (!cur_eq->val_int() && !cur_eq->null_value)
+        break;
+      ++count_matches;
+    }
+    if (count_matches == tmp_table->s->fields)
+    {
+      res= TRUE; /* Found a matching row. */
+      goto end;
+    }
+  }
+
+  res= FALSE;
+end:
+  tmp_table->file->ha_rnd_end();
+  return res;
+}
+
+
+void subselect_table_scan_engine::cleanup()
+{
+}
+
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 51de9bf1040..5beabed9182 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -22,11 +22,14 @@
 #pragma interface			/* gcc class implementation */
 #endif
 
+#include <queues.h>
+
 class st_select_lex;
 class st_select_lex_unit;
 class JOIN;
-class select_subselect;
+class select_result_interceptor;
 class subselect_engine;
+class subselect_hash_sj_engine;
 class Item_bool_func2;
 class Comp_creator;
 
@@ -37,21 +40,17 @@ typedef class st_select_lex SELECT_LEX;
   including this file.
 */
 typedef Comp_creator* (*chooser_compare_func_creator)(bool invert);
+class Cached_item;
 
 /* base class for subselects */
 
 class Item_subselect :public Item_result_field
 {
-  my_bool value_assigned; /* value already assigned to subselect */
+  bool value_assigned;   /* value already assigned to subselect */
+  bool own_engine;  /* the engine was not taken from other Item_subselect */
 protected:
   /* thread handler, will be assigned in fix_fields only */
   THD *thd;
-  /* substitution instead of subselect in case of optimization */
-  Item *substitution;
-  /* unit of subquery */
-  st_select_lex_unit *unit;
-  /* engine that perform execution of subselect (single select or union) */
-  subselect_engine *engine;
   /* old engine if engine was changed */
   subselect_engine *old_engine;
   /* cache of used external tables */
@@ -64,8 +63,65 @@ protected:
   bool have_to_be_excluded;
   /* cache of constant state */
   bool const_item_cache;
-
+  
+  bool inside_first_fix_fields;
+  bool done_first_fix_fields;
+  Item *expr_cache;
+  /*
+    Set to TRUE if at optimization or execution time we determine that this
+    item's value is a constant. We need this member because it is not possible
+    to substitute 'this' with a constant item.
+  */
+  bool forced_const;
+#ifndef DBUG_OFF
+  /* Count the number of times this subquery predicate has been executed. */
+  uint exec_counter;
+#endif
 public:
+  /* 
+    Used inside Item_subselect::fix_fields() according to this scenario:
+      > Item_subselect::fix_fields
+        > engine->prepare
+          > child_join->prepare
+            (Here we realize we need to do the rewrite and set
+             substitution= some new Item, eg. Item_in_optimizer )
+          < child_join->prepare
+        < engine->prepare
+        *ref= substitution;
+        substitution= NULL;
+      < Item_subselect::fix_fields
+  */
+  /* TODO make this protected member again. */
+  Item *substitution;
+  /* engine that perform execution of subselect (single select or union) */
+  /* TODO make this protected member again. */
+  subselect_engine *engine;
+  /* unit of subquery */
+  st_select_lex_unit *unit;
+  /* A reference from inside subquery predicate to somewhere outside of it */
+  class Ref_to_outside : public Sql_alloc
+  {
+  public:
+    st_select_lex *select; /* Select where the reference is pointing to */
+    /* 
+      What is being referred. This may be NULL when we're referring to an
+      aggregate function.
+    */ 
+    Item *item; 
+  };
+  /*
+    References from within this subquery to somewhere outside of it (i.e. to
+    parent select, grandparent select, etc)
+  */
+  List<Ref_to_outside> upper_refs;
+  st_select_lex *parent_select;
+
+  /*
+   TRUE<=>Table Elimination has made it redundant to evaluate this select
+          (and so it is not part of QEP, etc)
+  */
+  bool eliminated;
+  
   /* changed engine indicator */
   bool engine_changed;
   /* subquery is transformed */
@@ -74,29 +130,35 @@ public:
   /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
   bool is_correlated; 
 
-  enum trans_res {RES_OK, RES_REDUCE, RES_ERROR};
   enum subs_type {UNKNOWN_SUBS, SINGLEROW_SUBS,
 		  EXISTS_SUBS, IN_SUBS, ALL_SUBS, ANY_SUBS};
 
   Item_subselect();
 
   virtual subs_type substype() { return UNKNOWN_SUBS; }
+  bool is_in_predicate()
+  {
+    return (substype() == Item_subselect::IN_SUBS ||
+            substype() == Item_subselect::ALL_SUBS ||
+            substype() == Item_subselect::ANY_SUBS);
+  }
 
   /*
-     We need this method, because some compilers do not allow 'this'
-     pointer in constructor initialization list, but we need pass pointer
-     to subselect Item class to select_subselect classes constructor.
+    We need this method, because some compilers do not allow 'this'
+    pointer in constructor initialization list, but we need to pass a pointer
+    to subselect Item class to select_result_interceptor's constructor.
   */
   virtual void init (st_select_lex *select_lex,
-		     select_subselect *result);
+		     select_result_interceptor *result);
 
   ~Item_subselect();
   void cleanup();
   virtual void reset()
   {
+    eliminated= FALSE;
     null_value= 1;
   }
-  virtual trans_res select_transformer(JOIN *join);
+  virtual bool select_transformer(JOIN *join);
   bool assigned() { return value_assigned; }
   void assigned(bool a) { value_assigned= a; }
   enum Type type() const;
@@ -106,13 +168,25 @@ public:
     return null_value;
   }
   bool fix_fields(THD *thd, Item **ref);
+  bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void recalc_used_tables(st_select_lex *new_parent, bool after_pullout);
   virtual bool exec();
+  /*
+    If subquery optimization or execution determines that the subquery has
+    an empty result, mark the subquery predicate as a constant value.
+  */
+  void make_const()
+  { 
+    used_tables_cache= 0;
+    const_item_cache= 0;
+    forced_const= TRUE; 
+  }
   virtual void fix_length_and_dec();
   table_map used_tables() const;
   table_map not_null_tables() const { return 0; }
   bool const_item() const;
   inline table_map get_used_tables_cache() { return used_tables_cache; }
-  inline bool get_const_item_cache() { return const_item_cache; }
   Item *get_tmp_table_item(THD *thd);
   void update_used_tables();
   virtual void print(String *str, enum_query_type query_type);
@@ -130,6 +204,7 @@ public:
   */
   bool is_evaluated() const;
   bool is_uncacheable() const;
+  bool is_expensive() { return TRUE; }
 
   /*
     Used by max/min subquery to initialize value presence registration
@@ -138,6 +213,24 @@ public:
   virtual void reset_value_registration() {}
   enum_parsing_place place() { return parsing_place; }
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
+  bool mark_as_eliminated_processor(uchar *arg);
+  bool eliminate_subselect_processor(uchar *arg);
+  bool set_fake_select_as_master_processor(uchar *arg);
+  bool enumerate_field_refs_processor(uchar *arg);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("subselect");
+  }
+  /**
+    Callback to test if an IN predicate is expensive.
+
+    @notes
+    The return value affects the behavior of make_cond_for_table().
+
+    @retval TRUE  if the predicate is expensive
+    @retval FALSE otherwise
+  */
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
 
   /**
     Get the SELECT_LEX structure associated with this Item.
@@ -145,8 +238,10 @@ public:
   */
   st_select_lex* get_select_lex();
   const char *func_name() const { DBUG_ASSERT(0); return "subselect"; }
+  virtual bool expr_cache_is_needed(THD *);
+  virtual void get_cache_parameters(List<Item> &parameters);
 
-  friend class select_subselect;
+  friend class select_result_interceptor;
   friend class Item_in_optimizer;
   friend bool Item_field::fix_fields(THD *, Item **);
   friend int  Item_field::fix_outer_field(THD *, Field **, Item **);
@@ -154,6 +249,8 @@ public:
   friend void mark_select_range_as_dependent(THD*,
                                              st_select_lex*, st_select_lex*,
                                              Field*, Item*, Item_ident*);
+  friend bool convert_join_subqueries_to_semijoins(JOIN *join);
+
 };
 
 /* single value subselect */
@@ -165,13 +262,14 @@ protected:
   Item_cache *value, **row;
 public:
   Item_singlerow_subselect(st_select_lex *select_lex);
-  Item_singlerow_subselect() :Item_subselect(), value(0), row (0) {}
+  Item_singlerow_subselect() :Item_subselect(), value(0), row (0)
+  {}
 
   void cleanup();
   subs_type substype() { return SINGLEROW_SUBS; }
 
   void reset();
-  trans_res select_transformer(JOIN *join);
+  bool select_transformer(JOIN *join);
   void store(uint i, Item* item);
   double val_real();
   longlong val_int ();
@@ -203,6 +301,8 @@ public:
   */
   st_select_lex* invalidate_and_restore_select_lex();
 
+  Item* expr_cache_insert_transformer(uchar *thd_arg);
+
   friend class select_singlerow_subselect;
 };
 
@@ -230,6 +330,8 @@ class Item_exists_subselect :public Item_subselect
 protected:
   bool value; /* value of this item (boolean: exists/not-exists) */
 
+  void init_length_and_dec();
+
 public:
   Item_exists_subselect(st_select_lex *select_lex);
   Item_exists_subselect(): Item_subselect() {}
@@ -237,6 +339,7 @@ public:
   subs_type substype() { return EXISTS_SUBS; }
   void reset() 
   {
+    eliminated= FALSE;
     value= 0;
   }
 
@@ -249,20 +352,43 @@ public:
   void fix_length_and_dec();
   virtual void print(String *str, enum_query_type query_type);
 
+  Item* expr_cache_insert_transformer(uchar *thd_arg);
+
   friend class select_exists_subselect;
   friend class subselect_uniquesubquery_engine;
   friend class subselect_indexsubquery_engine;
 };
 
 
+TABLE_LIST * const NO_JOIN_NEST=(TABLE_LIST*)0x1;
+
 /*
-  IN subselect: this represents "left_exr IN (SELECT ...)"
+  Possible methods to execute an IN predicate. These are set by the optimizer
+  based on user-set optimizer switches, semantic analysis and cost comparison.
+*/
+#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
+#define SUBS_SEMI_JOIN 1       /* IN was converted to semi-join. */
+#define SUBS_IN_TO_EXISTS 2    /* IN was converted to correlated EXISTS. */
+#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */
+/* Partial matching substrategies of MATERIALIZATION. */
+#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8
+#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16
+/* ALL/ANY will be transformed with max/min optimization */
+/*   The subquery has not aggregates, transform it into a MAX/MIN query. */
+#define SUBS_MAXMIN_INJECTED 32
+/*   The subquery has aggregates, use a special max/min subselect engine. */
+#define SUBS_MAXMIN_ENGINE 64
+
 
+/**
+  Representation of IN subquery predicates of the form
+  "left_expr IN (SELECT ...)".
+
+  @details
   This class has: 
-   - (as a descendant of Item_subselect) a "subquery execution engine" which 
-      allows it to evaluate subqueries. (and this class participates in
-      execution by having was_null variable where part of execution result
-      is stored.
+   - A "subquery execution engine" (as a subclass of Item_subselect) that allows
+     it to evaluate subqueries. (and this class participates in execution by
+     having was_null variable where part of execution result is stored.
    - Transformation methods (todo: more on this).
 
   This class is not used directly, it is "wrapped" into Item_in_optimizer
@@ -272,20 +398,94 @@ public:
 class Item_in_subselect :public Item_exists_subselect
 {
 protected:
-  Item *left_expr;
+  /*
+    Cache of the left operand of the subquery predicate. Allocated in the
+    runtime memory root, for each execution, thus need not be freed.
+  */
+  List<Cached_item> *left_expr_cache;
+  bool first_execution;
+
   /*
     expr & optimizer used in subselect rewriting to store Item for
     all JOIN in UNION
   */
   Item *expr;
-  Item_in_optimizer *optimizer;
   bool was_null;
   bool abort_on_null;
-  bool transformed;
 public:
+  Item_in_optimizer *optimizer;
+protected:
   /* Used to trigger on/off conditions that were pushed down to subselect */
   bool *pushed_cond_guards;
+  Comp_creator *func;
+
+protected:
+  bool init_cond_guards();
+  bool select_in_like_transformer(JOIN *join);
+  bool single_value_transformer(JOIN *join);
+  bool row_value_transformer(JOIN * join);
+  bool fix_having(Item *having, st_select_lex *select_lex);
+  bool create_single_in_to_exists_cond(JOIN * join,
+                                       Item **where_item,
+                                       Item **having_item);
+  bool create_row_in_to_exists_cond(JOIN * join,
+                                    Item **where_item,
+                                    Item **having_item);
+public:
+  Item *left_expr;
+  /* Priority of this predicate in the convert-to-semi-join-nest process. */
+  int sj_convert_priority;
+  /*
+    Used by subquery optimizations to keep track about in which clause this
+    subquery predicate is located: 
+      NO_JOIN_NEST      - the predicate is an AND-part of the WHERE
+      join nest pointer - the predicate is an AND-part of ON expression
+                          of a join nest   
+      NULL              - for all other locations
+    See also THD::emb_on_expr_nest.
+  */
+  TABLE_LIST *emb_on_expr_nest;
+  /*
+    Types of left_expr and subquery's select list allow to perform subquery
+    materialization. Currently, we set this to FALSE when it as well could
+    be TRUE. This is to be properly addressed with fix for BUG#36752.
+  */
+  bool types_allow_materialization;
+
+  /* 
+    Same as above, but they also allow to scan the materialized table. 
+  */
+  bool sjm_scan_allowed;
+  double jtbm_read_time;
+  double jtbm_record_count;
+
+  /* A bitmap of possible execution strategies for an IN predicate. */
+  uchar in_strategy;
+
+  bool is_jtbm_merged;
 
+  /*
+    TRUE<=>this is a flattenable semi-join, false overwise.
+  */
+  bool is_flattenable_semijoin;
+
+  /*
+    TRUE<=>registered in the list of semijoins in outer select
+  */
+  bool is_registered_semijoin;
+
+  /*
+    Used to determine how this subselect item is represented in the item tree,
+    in case there is a need to locate it there and replace with something else.
+    Two options are possible:
+      1. This item is there 'as-is'.
+      1. This item is wrapped within Item_in_optimizer.
+  */
+  Item *original_item()
+  {
+    return is_flattenable_semijoin ? (Item*)this : (Item*)optimizer;
+  }
+  
   bool *get_cond_guard(int i)
   {
     return pushed_cond_guards ? pushed_cond_guards + i : NULL;
@@ -301,21 +501,26 @@ public:
 
   Item_in_subselect(Item * left_expr, st_select_lex *select_lex);
   Item_in_subselect()
-    :Item_exists_subselect(), optimizer(0), abort_on_null(0), transformed(0),
-     pushed_cond_guards(NULL), upper_item(0)
-  {}
-
+    :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
+     abort_on_null(0), optimizer(0),
+    pushed_cond_guards(NULL), func(NULL), in_strategy(SUBS_NOT_TRANSFORMED),
+    is_jtbm_merged(FALSE),
+    upper_item(0)
+    {}
+  void cleanup();
   subs_type substype() { return IN_SUBS; }
   void reset() 
   {
+    eliminated= FALSE;
     value= 0;
     null_value= 0;
     was_null= 0;
   }
-  trans_res select_transformer(JOIN *join);
-  trans_res select_in_like_transformer(JOIN *join, Comp_creator *func);
-  trans_res single_value_transformer(JOIN *join, Comp_creator *func);
-  trans_res row_value_transformer(JOIN * join);
+  bool select_transformer(JOIN *join);
+  bool create_in_to_exists_cond(JOIN *join_arg);
+  bool inject_in_to_exists_cond(JOIN *join_arg);
+
+  virtual bool exec();
   longlong val_int();
   double val_real();
   String *val_str(String*);
@@ -327,10 +532,27 @@ public:
   bool test_limit(st_select_lex_unit *unit);
   virtual void print(String *str, enum_query_type query_type);
   bool fix_fields(THD *thd, Item **ref);
-
+  void fix_length_and_dec();
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void update_used_tables();
+  bool setup_mat_engine();
+  bool init_left_expr_cache();
+  /* Inform 'this' that it was computed, and contains a valid result. */
+  void set_first_execution() { if (first_execution) first_execution= FALSE; }
+  bool expr_cache_is_needed(THD *thd);
+  
+  int optimize(double *out_rows, double *cost);
+  /* 
+    Return the identifier that we could use to identify the subquery for the
+    user.
+  */
+  int get_identifier();
   friend class Item_ref_null_helper;
   friend class Item_is_not_null_test;
+  friend class Item_in_optimizer;
   friend class subselect_indexsubquery_engine;
+  friend class subselect_hash_sj_engine;
+  friend class subselect_partial_match_engine;
 };
 
 
@@ -339,23 +561,26 @@ class Item_allany_subselect :public Item_in_subselect
 {
 public:
   chooser_compare_func_creator func_creator;
-  Comp_creator *func;
   bool all;
 
   Item_allany_subselect(Item * left_expr, chooser_compare_func_creator fc,
                         st_select_lex *select_lex, bool all);
 
+  void cleanup();
   // only ALL subquery has upper not
   subs_type substype() { return all?ALL_SUBS:ANY_SUBS; }
-  trans_res select_transformer(JOIN *join);
+  bool select_transformer(JOIN *join);
+  void create_comp_func(bool invert) { func= func_creator(invert); }
   virtual void print(String *str, enum_query_type query_type);
+  bool is_maxmin_applicable(JOIN *join);
+  bool transform_into_max_min(JOIN *join);
 };
 
 
 class subselect_engine: public Sql_alloc
 {
 protected:
-  select_subselect *result; /* results storage class */
+  select_result_interceptor *result; /* results storage class */
   THD *thd; /* pointer to current THD */
   Item_subselect *item; /* item, that use this engine */
   enum Item_result res_type; /* type of results */
@@ -363,14 +588,20 @@ protected:
   bool maybe_null; /* may be null (first item in select) */
 public:
 
-  subselect_engine(Item_subselect *si, select_subselect *res)
-    :thd(0)
+  enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE,
+                         UNION_ENGINE, UNIQUESUBQUERY_ENGINE,
+                         INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE,
+                         ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE};
+
+  subselect_engine(THD *thd_arg, Item_subselect *si,
+                   select_result_interceptor *res)
   {
     result= res;
     item= si;
     res_type= STRING_RESULT;
     res_field_type= MYSQL_TYPE_VAR_STRING;
     maybe_null= 0;
+    set_thd(thd_arg);
   }
   virtual ~subselect_engine() {}; // to satisfy compiler
   virtual void cleanup()= 0;
@@ -412,13 +643,17 @@ public:
   virtual bool may_be_null() { return maybe_null; };
   virtual table_map upper_select_const_tables()= 0;
   static table_map calc_const_tables(TABLE_LIST *);
+  static table_map calc_const_tables(List<TABLE_LIST> &list);
   virtual void print(String *str, enum_query_type query_type)= 0;
-  virtual bool change_result(Item_subselect *si, select_subselect *result)= 0;
+  virtual bool change_result(Item_subselect *si,
+                             select_result_interceptor *result,
+                             bool temp= FALSE)= 0;
   virtual bool no_tables()= 0;
   virtual bool is_executed() const { return FALSE; }
   /* Check if subquery produced any rows during last query execution */
   virtual bool no_rows() = 0;
-
+  virtual enum_engine_type engine_type() { return ABSTRACT_ENGINE; }
+  virtual int get_identifier() { DBUG_ASSERT(0); return 0; }
 protected:
   void set_row(List<Item> &item_list, Item_cache **row);
 };
@@ -426,15 +661,14 @@ protected:
 
 class subselect_single_select_engine: public subselect_engine
 {
-  my_bool prepared; /* simple subselect is prepared */
-  my_bool optimized; /* simple subselect is optimized */
-  my_bool executed; /* simple subselect is executed */
-  my_bool optimize_error; ///< simple subselect optimization failed
+  bool prepared; /* simple subselect is prepared */
+  bool executed; /* simple subselect is executed */
+  bool optimize_error; ///< simple subselect optimization failed
   st_select_lex *select_lex; /* corresponding select_lex */
   JOIN * join; /* corresponding JOIN structure */
 public:
-  subselect_single_select_engine(st_select_lex *select,
-				 select_subselect *result,
+  subselect_single_select_engine(THD *thd_arg, st_select_lex *select,
+				 select_result_interceptor *result,
 				 Item_subselect *item);
   void cleanup();
   int prepare();
@@ -445,11 +679,18 @@ public:
   void exclude();
   table_map upper_select_const_tables();
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si,
+                     select_result_interceptor *result,
+                     bool temp);
   bool no_tables();
   bool may_be_null();
   bool is_executed() const { return executed; }
   bool no_rows();
+  virtual enum_engine_type engine_type() { return SINGLE_SELECT_ENGINE; }
+  int get_identifier();
+
+  friend class subselect_hash_sj_engine;
+  friend class Item_in_subselect;
 };
 
 
@@ -457,8 +698,8 @@ class subselect_union_engine: public subselect_engine
 {
   st_select_lex_unit *unit;  /* corresponding unit structure */
 public:
-  subselect_union_engine(st_select_lex_unit *u,
-			 select_subselect *result,
+  subselect_union_engine(THD *thd_arg, st_select_lex_unit *u,
+			 select_result_interceptor *result,
 			 Item_subselect *item);
   void cleanup();
   int prepare();
@@ -469,10 +710,13 @@ public:
   void exclude();
   table_map upper_select_const_tables();
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si,
+                     select_result_interceptor *result,
+                     bool temp= FALSE);
   bool no_tables();
   bool is_executed() const;
   bool no_rows();
+  virtual enum_engine_type engine_type() { return UNION_ENGINE; }
 };
 
 
@@ -512,25 +756,28 @@ public:
   // constructor can assign THD because it will be called after JOIN::prepare
   subselect_uniquesubquery_engine(THD *thd_arg, st_join_table *tab_arg,
 				  Item_subselect *subs, Item *where)
-    :subselect_engine(subs, 0), tab(tab_arg), cond(where)
-  {
-    set_thd(thd_arg);
-  }
+    :subselect_engine(thd_arg, subs, 0), tab(tab_arg), cond(where)
+  {}
   ~subselect_uniquesubquery_engine();
   void cleanup();
   int prepare();
   void fix_length_and_dec(Item_cache** row);
   int exec();
   uint cols() { return 1; }
-  uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
+  uint8 uncacheable() { return UNCACHEABLE_DEPENDENT_INJECTED; }
   void exclude();
   table_map upper_select_const_tables() { return 0; }
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si,
+                     select_result_interceptor *result,
+                     bool temp= FALSE);
   bool no_tables();
+  int index_lookup(); /* TIMOUR: this method needs refactoring. */
   int scan_table();
   bool copy_ref_key();
+  int copy_ref_key_simple();  /* TIMOUR: this method needs refactoring. */
   bool no_rows() { return empty_result_set; }
+  virtual enum_engine_type engine_type() { return UNIQUESUBQUERY_ENGINE; }
 };
 
 
@@ -580,6 +827,7 @@ public:
   {}
   int exec();
   virtual void print (String *str, enum_query_type query_type);
+  virtual enum_engine_type engine_type() { return INDEXSUBQUERY_ENGINE; }
 };
 
 /*
@@ -597,9 +845,458 @@ inline bool Item_subselect::is_evaluated() const
   return engine->is_executed();
 }
 
+
 inline bool Item_subselect::is_uncacheable() const
 {
   return engine->uncacheable();
 }
 
+/**
+  Compute an IN predicate via a hash semi-join. This class is responsible for
+  the materialization of the subquery, and the selection of the correct and
+  optimal execution method (e.g. direct index lookup, or partial matching) for
+  the IN predicate.
+*/
+
+class subselect_hash_sj_engine : public subselect_engine
+{
+public:
+  /* The table into which the subquery is materialized. */
+  TABLE *tmp_table;
+  /* TRUE if the subquery was materialized into a temp table. */
+  bool is_materialized;
+  /*
+    The old engine already chosen at parse time and stored in permanent memory.
+    Through this member we can re-create and re-prepare materialize_join for
+    each execution of a prepared statement. We also reuse the functionality
+    of subselect_single_select_engine::[prepare | cols].
+  */
+  subselect_single_select_engine *materialize_engine;
+  /*
+    QEP to execute the subquery and materialize its result into a
+    temporary table. Created during the first call to exec().
+  */
+  JOIN *materialize_join;
+  /*
+    A conjunction of all the equality condtions between all pairs of expressions
+    that are arguments of an IN predicate. We need these to post-filter some
+    IN results because index lookups sometimes match values that are actually
+    not equal to the search key in SQL terms.
+  */
+  Item_cond_and *semi_join_conds;
+  Name_resolution_context *semi_join_conds_context;
+
+
+  subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate,
+                           subselect_single_select_engine *old_engine)
+    : subselect_engine(thd, in_predicate, NULL), 
+      tmp_table(NULL), is_materialized(FALSE), materialize_engine(old_engine),
+      materialize_join(NULL),  semi_join_conds(NULL), lookup_engine(NULL),
+      count_partial_match_columns(0), count_null_only_columns(0),
+      strategy(UNDEFINED)
+  {}
+  ~subselect_hash_sj_engine();
+
+  bool init(List<Item> *tmp_columns, uint subquery_id);
+  void cleanup();
+  int prepare();
+  int exec();
+  virtual void print(String *str, enum_query_type query_type);
+  uint cols()
+  {
+    return materialize_engine->cols();
+  }
+  uint8 uncacheable() { return materialize_engine->uncacheable(); }
+  table_map upper_select_const_tables() { return 0; }
+  bool no_rows() { return !tmp_table->file->stats.records; }
+  virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; }
+  /*
+    TODO: factor out all these methods in a base subselect_index_engine class
+    because all of them have dummy implementations and should never be called.
+  */
+  void fix_length_and_dec(Item_cache** row);//=>base class
+  void exclude(); //=>base class
+  //=>base class
+  bool change_result(Item_subselect *si,
+                     select_result_interceptor *result,
+                     bool temp= FALSE);
+  bool no_tables();//=>base class
+
+protected:
+  /* The engine used to compute the IN predicate. */
+  subselect_engine *lookup_engine;
+  /* Keyparts of the only non-NULL composite index in a rowid merge. */
+  MY_BITMAP non_null_key_parts;
+  /* Keyparts of the single column indexes with NULL, one keypart per index. */
+  MY_BITMAP partial_match_key_parts;
+  uint count_partial_match_columns;
+  uint count_null_only_columns;
+  /* Possible execution strategies that can be used to compute hash semi-join.*/
+  enum exec_strategy {
+    UNDEFINED,
+    COMPLETE_MATCH, /* Use regular index lookups. */
+    PARTIAL_MATCH,  /* Use some partial matching strategy. */
+    PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */
+    PARTIAL_MATCH_SCAN,  /* Use partial matching through table scan. */
+    IMPOSSIBLE      /* Subquery materialization is not applicable. */
+  };
+  /* The chosen execution strategy. Computed after materialization. */
+  exec_strategy strategy;
+  exec_strategy get_strategy_using_schema();
+  exec_strategy get_strategy_using_data();
+  ulonglong rowid_merge_buff_size(bool has_non_null_key,
+                                  bool has_covering_null_row,
+                                  MY_BITMAP *partial_match_key_parts);
+  void choose_partial_match_strategy(bool has_non_null_key,
+                                     bool has_covering_null_row,
+                                     MY_BITMAP *partial_match_key_parts);
+  bool make_semi_join_conds();
+  subselect_uniquesubquery_engine* make_unique_engine();
+
+};
+
+
+/*
+  Distinguish the type od (0-based) row numbers from the type of the index into
+  an array of row numbers.
+*/
+typedef ha_rows rownum_t;
+
+
+/*
+  An Ordered_key is an in-memory table index that allows O(log(N)) time
+  lookups of a multi-part key.
+
+  If the index is over a single column, then this column may contain NULLs, and
+  the NULLs are stored and tested separately for NULL in O(1) via is_null().
+  Multi-part indexes assume that the indexed columns do not contain NULLs.
+
+  TODO:
+  = Due to the unnatural assymetry between single and multi-part indexes, it
+    makes sense to somehow refactor or extend the class.
+
+  = This class can be refactored into a base abstract interface, and two
+    subclasses:
+    - one to represent single-column indexes, and
+    - another to represent multi-column indexes.
+    Such separation would allow slightly more efficient implementation of
+    the single-column indexes.
+  = The current design requires such indexes to be fully recreated for each
+    PS (re)execution, however most of the comprising objects can be reused.
+*/
+
+class Ordered_key : public Sql_alloc
+{
+protected:
+  /*
+    Index of the key in an array of keys. This index allows to
+    construct (sub)sets of keys represented by bitmaps.
+  */
+  uint keyid;
+  /* The table being indexed. */
+  TABLE *tbl;
+  /* The columns being indexed. */
+  Item_field **key_columns;
+  /* Number of elements in 'key_columns' (number of key parts). */
+  uint key_column_count;
+  /*
+    An expression, or sequence of expressions that forms the search key.
+    The search key is a sequence when it is Item_row. Each element of the
+    sequence is accessible via Item::element_index(int i).
+  */
+  Item *search_key;
+
+/* Value index related members. */
+  /*
+    The actual value index, consists of a sorted sequence of row numbers.
+  */
+  rownum_t *key_buff;
+  /* Number of elements in key_buff. */
+  ha_rows key_buff_elements;
+  /* Current element in 'key_buff'. */
+  ha_rows cur_key_idx;
+  /*
+    Mapping from row numbers to row ids. The element row_num_to_rowid[i]
+    contains a buffer with the rowid for the row numbered 'i'.
+    The memory for this member is not maintanined by this class because
+    all Ordered_key indexes of the same table share the same mapping.
+  */
+  uchar *row_num_to_rowid;
+  /*
+    A sequence of predicates to compare the search key with the corresponding
+    columns of a table row from the index.
+  */
+  Item_func_lt **compare_pred;
+
+/* Null index related members. */
+  MY_BITMAP null_key;
+  /* Count of NULLs per column. */
+  ha_rows null_count;
+  /* The row number that contains the first NULL in a column. */
+  ha_rows min_null_row;
+  /* The row number that contains the last NULL in a column. */
+  ha_rows max_null_row;
+
+protected:
+  bool alloc_keys_buffers();
+  /*
+    Quick sort comparison function that compares two rows of the same table
+    indentfied with their row numbers.
+  */
+  int cmp_keys_by_row_data(rownum_t a, rownum_t b);
+  static int cmp_keys_by_row_data_and_rownum(Ordered_key *key,
+                                             rownum_t* a, rownum_t* b);
+
+  int cmp_key_with_search_key(rownum_t row_num);
+
+public:
+  Ordered_key(uint keyid_arg, TABLE *tbl_arg,
+              Item *search_key_arg, ha_rows null_count_arg,
+              ha_rows min_null_row_arg, ha_rows max_null_row_arg,
+              uchar *row_num_to_rowid_arg);
+  ~Ordered_key();
+  void cleanup();
+  /* Initialize a multi-column index. */
+  bool init(MY_BITMAP *columns_to_index);
+  /* Initialize a single-column index. */
+  bool init(int col_idx);
+
+  uint get_column_count() { return key_column_count; }
+  uint get_keyid() { return keyid; }
+  uint get_field_idx(uint i)
+  {
+    DBUG_ASSERT(i < key_column_count);
+    return key_columns[i]->field->field_index;
+  }
+  /*
+    Get the search key element that corresponds to the i-th key part of this
+    index.
+  */
+  Item *get_search_key(uint i)
+  {
+    return search_key->element_index(key_columns[i]->field->field_index);
+  }
+  void add_key(rownum_t row_num)
+  {
+    /* The caller must know how many elements to add. */
+    DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements);
+    key_buff[cur_key_idx]= row_num;
+    ++cur_key_idx;
+  }
+
+  void sort_keys();
+  double null_selectivity();
+
+  /*
+    Position the current element at the first row that matches the key.
+    The key itself is propagated by evaluating the current value(s) of
+    this->search_key.
+  */
+  bool lookup();
+  /* Move the current index cursor to the first key. */
+  void first()
+  {
+    DBUG_ASSERT(key_buff_elements);
+    cur_key_idx= 0;
+  }
+  /* TODO */
+  bool next_same();
+  /* Move the current index cursor to the next key. */
+  bool next()
+  {
+    DBUG_ASSERT(key_buff_elements);
+    if (cur_key_idx < key_buff_elements - 1)
+    {
+      ++cur_key_idx;
+      return TRUE;
+    }
+    return FALSE;
+  };
+  /* Return the current index element. */
+  rownum_t current()
+  {
+    DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements);
+    return key_buff[cur_key_idx];
+  }
+
+  void set_null(rownum_t row_num)
+  {
+    bitmap_set_bit(&null_key, (uint)row_num);
+  }
+  bool is_null(rownum_t row_num)
+  {
+    /*
+      Indexes consisting of only NULLs do not have a bitmap buffer at all.
+      Their only initialized member is 'n_bits', which is equal to the number
+      of temp table rows.
+    */
+    if (null_count == tbl->file->stats.records)
+    {
+      DBUG_ASSERT(tbl->file->stats.records == null_key.n_bits);
+      return TRUE;
+    }
+    if (row_num > max_null_row || row_num < min_null_row)
+      return FALSE;
+    return bitmap_is_set(&null_key, (uint)row_num);
+  }
+  void print(String *str);
+};
+
+
+class subselect_partial_match_engine : public subselect_engine
+{
+protected:
+  /* The temporary table that contains a materialized subquery. */
+  TABLE *tmp_table;
+  /*
+    The engine used to check whether an IN predicate is TRUE or not. If not
+    TRUE, then subselect_rowid_merge_engine further distinguishes between
+    FALSE and UNKNOWN.
+  */
+  subselect_uniquesubquery_engine *lookup_engine;
+  /* A list of equalities between each pair of IN operands. */
+  List<Item> *equi_join_conds;
+  /*
+    True if there is an all NULL row in tmp_table. If so, then if there is
+    no complete match, there is a guaranteed partial match.
+  */
+  bool has_covering_null_row;
+
+  /*
+    True if all nullable columns of tmp_table consist of only NULL values.
+    If so, then if there is a match in the non-null columns, there is a
+    guaranteed partial match.
+  */
+  bool has_covering_null_columns;
+
+protected:
+  virtual bool partial_match()= 0;
+public:
+  subselect_partial_match_engine(THD *thd_arg,
+                                 subselect_uniquesubquery_engine *engine_arg,
+                                 TABLE *tmp_table_arg, Item_subselect *item_arg,
+                                 select_result_interceptor *result_arg,
+                                 List<Item> *equi_join_conds_arg,
+                                 bool has_covering_null_row_arg,
+                                 bool has_covering_null_columns_arg);
+  int prepare() { return 0; }
+  int exec();
+  void fix_length_and_dec(Item_cache**) {}
+  uint cols() { /* TODO: what is the correct value? */ return 1; }
+  uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
+  void exclude() {}
+  table_map upper_select_const_tables() { return 0; }
+  bool change_result(Item_subselect*,
+                     select_result_interceptor*,
+                     bool temp= FALSE)
+  { DBUG_ASSERT(FALSE); return false; }
+  bool no_tables() { return false; }
+  bool no_rows()
+  {
+    /*
+      TODO: It is completely unclear what is the semantics of this
+      method. The current result is computed so that the call to no_rows()
+      from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value
+      correctly.
+    */
+    return !(((Item_in_subselect *) item)->null_value);
+  }
+  void print(String*, enum_query_type);
+
+  friend void subselect_hash_sj_engine::cleanup();
+};
+
+
+class subselect_rowid_merge_engine: public subselect_partial_match_engine
+{
+protected:
+  /*
+    Mapping from row numbers to row ids. The rowids are stored sequentially
+    in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length.
+  */
+  uchar *row_num_to_rowid;
+  /*
+    A subset of all the keys for which there is a match for the same row.
+    Used during execution. Computed for each outer reference
+  */
+  MY_BITMAP matching_keys;
+  /*
+    The columns of the outer reference that are NULL. Computed for each
+    outer reference.
+  */
+  MY_BITMAP matching_outer_cols;
+  /*
+    Indexes of row numbers, sorted by <column_value, row_number>. If an
+    index may contain NULLs, the NULLs are stored efficiently in a bitmap.
+
+    The indexes are sorted by the selectivity of their NULL sub-indexes, the
+    one with the fewer NULLs is first. Thus, if there is any index on
+    non-NULL columns, it is contained in keys[0].
+  */
+  Ordered_key **merge_keys;
+  /* The number of elements in merge_keys. */
+  uint merge_keys_count;
+  /*
+    An index on all non-NULL columns of 'tmp_table'. The index has the
+    logical form: <[v_i1 | ... | v_ik], rownum>. It allows to find the row
+    number where the columns c_i1,...,c1_k contain the values v_i1,...,v_ik.
+    If such an index exists, it is always the first element of 'merge_keys'.
+  */
+  Ordered_key *non_null_key;
+  /*
+    Priority queue of Ordered_key indexes, one per NULLable column.
+    This queue is used by the partial match algorithm in method exec().
+  */
+  QUEUE pq;
+protected:
+  /*
+    Comparison function to compare keys in order of decreasing bitmap
+    selectivity.
+  */
+  static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2);
+  /*
+    Comparison function used by the priority queue pq, the 'smaller' key
+    is the one with the smaller current row number.
+  */
+  static int cmp_keys_by_cur_rownum(void *arg, uchar *k1, uchar *k2);
+
+  bool test_null_row(rownum_t row_num);
+  bool partial_match();
+public:
+  subselect_rowid_merge_engine(THD *thd_arg,
+                               subselect_uniquesubquery_engine *engine_arg,
+                               TABLE *tmp_table_arg, uint merge_keys_count_arg,
+                               bool has_covering_null_row_arg,
+                               bool has_covering_null_columns_arg,
+                               Item_subselect *item_arg,
+                               select_result_interceptor *result_arg,
+                               List<Item> *equi_join_conds_arg)
+    :subselect_partial_match_engine(thd_arg, engine_arg, tmp_table_arg,
+                                    item_arg, result_arg, equi_join_conds_arg,
+                                    has_covering_null_row_arg,
+                                    has_covering_null_columns_arg),
+    merge_keys_count(merge_keys_count_arg), non_null_key(NULL)
+  {}
+  ~subselect_rowid_merge_engine();
+  bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts);
+  void cleanup();
+  virtual enum_engine_type engine_type() { return ROWID_MERGE_ENGINE; }
+};
+
+
+class subselect_table_scan_engine: public subselect_partial_match_engine
+{
+protected:
+  bool partial_match();
+public:
+  subselect_table_scan_engine(THD *thd_arg,
+                              subselect_uniquesubquery_engine *engine_arg,
+                              TABLE *tmp_table_arg, Item_subselect *item_arg,
+                              select_result_interceptor *result_arg,
+                              List<Item> *equi_join_conds_arg,
+                              bool has_covering_null_row_arg,
+                              bool has_covering_null_columns_arg);
+  void cleanup();
+  virtual enum_engine_type engine_type() { return TABLE_SCAN_ENGINE; }
+};
 #endif /* ITEM_SUBSELECT_INCLUDED */
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index 161c8c8eacf..55bcf868690 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -362,7 +363,17 @@ bool Item_sum::register_sum_func(THD *thd, Item **ref)
          sl= sl->master_unit()->outer_select() )
       sl->master_unit()->item->with_sum_func= 1;
   }
-  thd->lex->current_select->mark_as_dependent(aggr_sel);
+  thd->lex->current_select->mark_as_dependent(thd, aggr_sel, NULL);
+  return FALSE;
+}
+
+
+bool Item_sum::collect_outer_ref_processor(uchar *param)
+{
+  Collect_deps_prm *prm= (Collect_deps_prm *)param;
+  SELECT_LEX *ds;
+  if ((ds= depended_from()) && ds->nest_level < prm->nest_level)
+    prm->parameters->add_unique(this, &cmp_items);
   return FALSE;
 }
 
@@ -431,6 +442,7 @@ void Item_sum::mark_as_sum_func()
   cur_select->n_sum_items++;
   cur_select->with_sum_func= 1;
   with_sum_func= 1;
+  with_field= 0;
 }
 
 
@@ -497,7 +509,7 @@ bool Item_sum::walk (Item_processor processor, bool walk_subquery,
 Field *Item_sum::create_tmp_field(bool group, TABLE *table,
                                   uint convert_blob_length)
 {
-  Field *field;
+  Field *UNINIT_VAR(field);
   switch (result_type()) {
   case REAL_RESULT:
     field= new Field_double(max_length, maybe_null, name, decimals, TRUE);
@@ -517,7 +529,8 @@ Field *Item_sum::create_tmp_field(bool group, TABLE *table,
     field= Field_new_decimal::create_from_item(this);
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     // This case should never be choosen
     DBUG_ASSERT(0);
     return 0;
@@ -538,11 +551,20 @@ void Item_sum::update_used_tables ()
       args[i]->update_used_tables();
       used_tables_cache|= args[i]->used_tables();
     }
-
-    used_tables_cache&= PSEUDO_TABLE_BITS;
-
-    /* the aggregate function is aggregated into its local context */
-    used_tables_cache |=  (1 << aggr_sel->join->tables) - 1;
+    /*
+      MariaDB: don't run the following {
+      
+      used_tables_cache&= PSEUDO_TABLE_BITS;
+
+      // the aggregate function is aggregated into its local context
+      used_tables_cache |=  (1 << aggr_sel->join->table_count) - 1;
+      
+      } because if we do it, table elimination will assume that
+        - constructs like "COUNT(*)" use columns from all tables
+        - so, it is not possible to eliminate any table
+      our solution for COUNT(*) is that it has
+        item->used_tables() == 0 && !item->const_item()
+    */
   }
 }
 
@@ -769,7 +791,7 @@ bool Aggregator_distinct::setup(THD *thd)
     if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
                                   0,
                                   (select_lex->options | thd->variables.option_bits),
-                                  HA_POS_ERROR, "")))
+                                  HA_POS_ERROR, const_cast<char*>(""))))
       return TRUE;
     table->file->extra(HA_EXTRA_NO_ROWS);		// Don't update rows
     table->no_rows=1;
@@ -984,7 +1006,7 @@ bool Aggregator_distinct::add()
       */
       return tree->unique_add(table->record[0] + table->s->null_bytes);
     }
-    if ((error= table->file->ha_write_row(table->record[0])) &&
+    if ((error= table->file->ha_write_tmp_row(table->record[0])) &&
         table->file->is_fatal_error(error, HA_CHECK_DUP))
       return TRUE;
     return FALSE;
@@ -1153,7 +1175,8 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref)
     max_length= float_length(decimals);
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   };
   setup_hybrid(args[0], NULL);
@@ -1188,17 +1211,31 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref)
     Setup cache/comparator of MIN/MAX functions. When called by the
     copy_or_same function value_arg parameter contains calculated value
     of the original MIN/MAX object and it is saved in this object's cache.
+
+    We mark the value and arg_cache with 'RAND_TABLE_BIT' to ensure
+    that Arg_comparator::compare_datetime() doesn't allocate new
+    item inside of Arg_comparator.  This would cause compare_datetime()
+    and Item_sum_min::add() to use different values!
 */
 
 void Item_sum_hybrid::setup_hybrid(Item *item, Item *value_arg)
 {
-  value= Item_cache::get_cache(item);
+  if (!(value= Item_cache::get_cache(item)))
+    return;
   value->setup(item);
   value->store(value_arg);
-  arg_cache= Item_cache::get_cache(item);
+  /* Don't cache value, as it will change */
+  if (!item->const_item())
+    value->set_used_tables(RAND_TABLE_BIT);
+  if (!(arg_cache= Item_cache::get_cache(item, item->cmp_type())))
+    return;
   arg_cache->setup(item);
+  /* Don't cache value, as it will change */
+  if (!item->const_item())
+    arg_cache->set_used_tables(RAND_TABLE_BIT);
   cmp= new Arg_comparator();
-  cmp->set_cmp_func(this, (Item**)&arg_cache, (Item**)&value, FALSE);
+  if (cmp)
+    cmp->set_cmp_func(this, (Item**)&arg_cache, (Item**)&value, FALSE);
   collation.set(item->collation);
 }
 
@@ -1223,14 +1260,17 @@ Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table,
   */
   switch (args[0]->field_type()) {
   case MYSQL_TYPE_DATE:
-    field= new Field_newdate(maybe_null, name, collation.collation);
+    field= new Field_newdate(0, maybe_null ? (uchar*)"" : 0, 0, Field::NONE,
+                             name, collation.collation);
     break;
   case MYSQL_TYPE_TIME:
-    field= new Field_time(maybe_null, name, collation.collation);
+    field= new_Field_time(0, maybe_null ? (uchar*)"" : 0, 0, Field::NONE,
+                          name, decimals, collation.collation);
     break;
   case MYSQL_TYPE_TIMESTAMP:
   case MYSQL_TYPE_DATETIME:
-    field= new Field_datetime(maybe_null, name, collation.collation);
+    field= new_Field_datetime(0, maybe_null ? (uchar*)"" : 0, 0, Field::NONE,
+                              name, decimals, collation.collation);
     break;
   default:
     return Item_sum::create_tmp_field(group, table, convert_blob_length);
@@ -1289,13 +1329,14 @@ void Item_sum_sum::fix_length_and_dec()
   DBUG_ENTER("Item_sum_sum::fix_length_and_dec");
   maybe_null=null_value=1;
   decimals= args[0]->decimals;
-  switch (args[0]->result_type()) {
+  switch (args[0]->cast_to_int_type()) {
   case REAL_RESULT:
   case STRING_RESULT:
     hybrid_type= REAL_RESULT;
     sum= 0.0;
     break;
   case INT_RESULT:
+  case TIME_RESULT:
   case DECIMAL_RESULT:
   {
     /* SUM result can't be longer than length(arg) + length(MAX_ROWS) */
@@ -1309,7 +1350,7 @@ void Item_sum_sum::fix_length_and_dec()
     break;
   }
   case ROW_RESULT:
-  default:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   DBUG_PRINT("info", ("Type: %s (%d, %d)",
@@ -1746,7 +1787,8 @@ void Item_sum_variance::fix_length_and_dec()
     break;
   }
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   DBUG_PRINT("info", ("Type: REAL_RESULT (%d, %d)", max_length, (int)decimals));
@@ -1963,8 +2005,22 @@ void Item_sum_hybrid::cleanup()
 
 void Item_sum_hybrid::no_rows_in_result()
 {
-  was_values= FALSE;
-  clear();
+  /* We may be called here twice in case of ref field in function */
+  if (was_values)
+  {
+    was_values= FALSE;
+    was_null_value= value->null_value;
+    clear();
+  }
+}
+
+void Item_sum_hybrid::restore_to_before_no_rows_in_result()
+{
+  if (!was_values)
+  {
+    was_values= TRUE;
+    null_value= value->null_value= was_null_value;
+  }
 }
 
 
@@ -2169,7 +2225,8 @@ void Item_sum_hybrid::reset_field()
     break;
   }
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
 }
@@ -2643,8 +2700,10 @@ void Item_udf_sum::clear()
 
 bool Item_udf_sum::add()
 {
+  my_bool tmp_null_value;
   DBUG_ENTER("Item_udf_sum::add");
-  udf.add(&null_value);
+  udf.add(&tmp_null_value);
+  null_value= tmp_null_value;
   DBUG_RETURN(0);
 }
 
@@ -2680,11 +2739,15 @@ Item *Item_sum_udf_float::copy_or_same(THD* thd)
 
 double Item_sum_udf_float::val_real()
 {
+  my_bool tmp_null_value;
+  double res;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_sum_udf_float::val");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
 		     args[0]->result_type(), arg_count));
-  DBUG_RETURN(udf.val(&null_value));
+  res= udf.val(&tmp_null_value);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -2720,12 +2783,16 @@ longlong Item_sum_udf_decimal::val_int()
 
 my_decimal *Item_sum_udf_decimal::val_decimal(my_decimal *dec_buf)
 {
+  my_decimal *res;
+  my_bool tmp_null_value;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_func_udf_decimal::val_decimal");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
                      args[0]->result_type(), arg_count));
 
-  DBUG_RETURN(udf.val_decimal(&null_value, dec_buf));
+  res= udf.val_decimal(&tmp_null_value, dec_buf);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -2742,11 +2809,15 @@ Item *Item_sum_udf_int::copy_or_same(THD* thd)
 
 longlong Item_sum_udf_int::val_int()
 {
+  my_bool tmp_null_value;
+  longlong res;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_sum_udf_int::val_int");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
 		     args[0]->result_type(), arg_count));
-  DBUG_RETURN(udf.val_int(&null_value));
+  res= udf.val_int(&tmp_null_value);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -3080,10 +3151,10 @@ Item_func_group_concat::Item_func_group_concat(THD *thd,
     object being copied.
   */
   ORDER *tmp;
-  if (!(order= (ORDER **) thd->alloc(sizeof(ORDER *) * arg_count_order +
+  if (!(tmp= (ORDER *) thd->alloc(sizeof(ORDER *) * arg_count_order +
                                      sizeof(ORDER) * arg_count_order)))
     return;
-  tmp= (ORDER *)(order + arg_count_order);
+  order= (ORDER **)(tmp + arg_count_order);
   for (uint i= 0; i < arg_count_order; i++, tmp++)
   {
     memcpy(tmp, item->order[i], sizeof(ORDER));
@@ -3092,7 +3163,6 @@ Item_func_group_concat::Item_func_group_concat(THD *thd,
 }
 
 
-
 void Item_func_group_concat::cleanup()
 {
   DBUG_ENTER("Item_func_group_concat::cleanup");
diff --git a/sql/item_sum.h b/sql/item_sum.h
index c303385c535..c7a222bcd84 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -1,7 +1,7 @@
 #ifndef ITEM_SUM_INCLUDED
 #define ITEM_SUM_INCLUDED
-
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011 Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -376,6 +376,12 @@ protected:
   */
   Item **orig_args, *tmp_orig_args[2];
   table_map used_tables_cache;
+  
+  /*
+    TRUE <=> We've managed to calculate the value of this Item in
+    opt_sum_query(), hence it can be considered constant at all subsequent
+    steps.
+  */
   bool forced_const;
   static ulonglong ram_limitation(THD *thd);
 
@@ -436,6 +442,15 @@ public:
   virtual void fix_length_and_dec() { maybe_null=1; null_value=1; }
   virtual Item *result_item(Field *field)
     { return new Item_field(field); }
+  /*
+    Return bitmap of tables that are needed to evaluate the item.
+
+    The implementation takes into account the used strategy: items resolved
+    at optimization phase will report 0.
+    Items that depend on the number of join output records, but not columns
+    of any particular table (like COUNT(*)) will report 0 from used_tables(),
+    but will still return false from const_item().
+  */
   table_map used_tables() const { return used_tables_cache; }
   void update_used_tables ();
   bool is_null() { return null_value; }
@@ -471,6 +486,7 @@ public:
   virtual Field *create_tmp_field(bool group, TABLE *table,
                                   uint convert_blob_length);
   bool walk(Item_processor processor, bool walk_subquery, uchar *argument);
+  virtual bool collect_outer_ref_processor(uchar *param);
   bool init_sum_func_check(THD *thd);
   bool check_sum_func(THD *thd, Item **ref);
   bool register_sum_func(THD *thd, Item **ref);
@@ -527,6 +543,11 @@ public:
   virtual bool setup(THD *thd) { return false; }
 
   virtual void cleanup();
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name()); 
+  }
+  bool clear_sum_processor(uchar *arg) { clear(); return 0; }
 };
 
 
@@ -809,6 +830,10 @@ public:
   }
   void fix_length_and_dec() {}
   enum Item_result result_type () const { return hybrid_type; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("avg_field");
+  }
   const char *func_name() const { DBUG_ASSERT(0); return "avg_field"; }
 };
 
@@ -886,6 +911,10 @@ public:
   }
   void fix_length_and_dec() {}
   enum Item_result result_type () const { return hybrid_type; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("var_field");
+  }
   const char *func_name() const { DBUG_ASSERT(0); return "variance_field"; }
 };
 
@@ -999,6 +1028,7 @@ protected:
   enum_field_types hybrid_field_type;
   int cmp_sign;
   bool was_values;  // Set if we have found at least one row (for max/min only)
+  bool was_null_value;
 
   public:
   Item_sum_hybrid(Item *item_par,int sign)
@@ -1030,13 +1060,9 @@ protected:
   void cleanup();
   bool any_value() { return was_values; }
   void no_rows_in_result();
+  void restore_to_before_no_rows_in_result();
   Field *create_tmp_field(bool group, TABLE *table,
 			  uint convert_blob_length);
-  /*
-    MIN/MAX uses Item_cache_datetime for storing DATETIME values, thus
-    in this case a correct INT value can be provided.
-  */
-  bool result_as_longlong() { return args[0]->result_as_longlong(); }
 };
 
 
@@ -1427,8 +1453,13 @@ public:
   void make_unique();
   double val_real()
   {
-    String *res;  res=val_str(&str_value);
-    return res ? my_atof(res->c_ptr()) : 0.0;
+    int error;
+    const char *end;
+    String *res;
+    if (!(res= val_str(&str_value)))
+      return 0.0;
+    end= res->ptr() + res->length();
+    return (my_strtod(res->ptr(), (char**) &end, &error));
   }
   longlong val_int()
   {
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 6519146d040..da40e3b99d6 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -1,5 +1,6 @@
 /*
-   Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -40,14 +41,13 @@
 #include "sql_locale.h"          // MY_LOCALE my_locale_en_US
 #include "strfunc.h"             // check_word
 #include "sql_time.h"            // make_truncated_value_warning,
-                                 // make_time, get_date_from_daynr,
+                                 // get_date_from_daynr,
                                  // calc_weekday, calc_week,
                                  // convert_month_to_period,
                                  // convert_period_to_month,
-                                 // TIME_to_timestamp, make_date,
+                                 // TIME_to_timestamp,
                                  // calc_time_diff,
                                  // calc_time_from_sec,
-                                 // known_date_time_format,
                                  // get_date_time_format_str
 #include "tztime.h"              // struct Time_zone
 #include "sql_class.h"           // THD
@@ -57,193 +57,6 @@
 /** Day number for Dec 31st, 9999. */
 #define MAX_DAY_NUMBER 3652424L
 
-/**
-  @todo
-  OPTIMIZATION
-  - Replace the switch with a function that should be called for each
-  date type.
-  - Remove sprintf and opencode the conversion, like we do in
-  Field_datetime.
-
-  The reason for this functions existence is that as we don't have a
-  way to know if a datetime/time value has microseconds in them
-  we are now only adding microseconds to the output if the
-  value has microseconds.
-
-  We can't use a standard make_date_time() for this as we don't know
-  if someone will use %f in the format specifier in which case we would get
-  the microseconds twice.
-*/
-
-static bool make_datetime(date_time_format_types format, MYSQL_TIME *ltime,
-			  String *str)
-{
-  char *buff;
-  CHARSET_INFO *cs= &my_charset_numeric;
-  uint length= MAX_DATE_STRING_REP_LENGTH;
-
-  if (str->alloc(length))
-    return 1;
-  buff= (char*) str->ptr();
-
-  switch (format) {
-  case TIME_ONLY:
-    length= cs->cset->snprintf(cs, buff, length, "%s%02d:%02d:%02d",
-			       ltime->neg ? "-" : "",
-			       ltime->hour, ltime->minute, ltime->second);
-    break;
-  case TIME_MICROSECOND:
-    length= cs->cset->snprintf(cs, buff, length, "%s%02d:%02d:%02d.%06ld",
-			       ltime->neg ? "-" : "",
-			       ltime->hour, ltime->minute, ltime->second,
-			       ltime->second_part);
-    break;
-  case DATE_ONLY:
-    length= cs->cset->snprintf(cs, buff, length, "%04d-%02d-%02d",
-			       ltime->year, ltime->month, ltime->day);
-    break;
-  case DATE_TIME:
-    length= cs->cset->snprintf(cs, buff, length,
-			       "%04d-%02d-%02d %02d:%02d:%02d",
-			       ltime->year, ltime->month, ltime->day,
-			       ltime->hour, ltime->minute, ltime->second);
-    break;
-  case DATE_TIME_MICROSECOND:
-    length= cs->cset->snprintf(cs, buff, length,
-			       "%04d-%02d-%02d %02d:%02d:%02d.%06ld",
-			       ltime->year, ltime->month, ltime->day,
-			       ltime->hour, ltime->minute, ltime->second,
-			       ltime->second_part);
-    break;
-  }
-
-  str->length(length);
-  str->set_charset(cs);
-  return 0;
-}
-
-
-/*
-  Wrapper over make_datetime() with validation of the input MYSQL_TIME value
-
-  NOTE
-    see make_datetime() for more information
-
-  RETURN
-    1    if there was an error during converion
-    0    otherwise
-*/
-
-static bool make_datetime_with_warn(date_time_format_types format, MYSQL_TIME *ltime,
-                                    String *str)
-{
-  int warning= 0;
-
-  if (make_datetime(format, ltime, str))
-    return 1;
-  if (check_time_range(ltime, &warning))
-    return 1;
-  if (!warning)
-    return 0;
-
-  make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                               str->ptr(), str->length(),
-                               MYSQL_TIMESTAMP_TIME, NullS);
-  return make_datetime(format, ltime, str);
-}
-
-
-/*
-  Wrapper over make_time() with validation of the input MYSQL_TIME value
-
-  NOTE
-    see make_time() for more info
-
-  RETURN
-    1    if there was an error during conversion
-    0    otherwise
-*/
-
-static bool make_time_with_warn(const DATE_TIME_FORMAT *format,
-                                MYSQL_TIME *l_time, String *str)
-{
-  int warning= 0;
-  make_time(format, l_time, str);
-  if (check_time_range(l_time, &warning))
-    return 1;
-  if (warning)
-  {
-    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                 str->ptr(), str->length(),
-                                 MYSQL_TIMESTAMP_TIME, NullS);
-    make_time(format, l_time, str);
-  }
-
-  return 0;
-}
-
-
-/*
-  Convert seconds to MYSQL_TIME value with overflow checking
-
-  SYNOPSIS:
-    sec_to_time()
-    seconds          number of seconds
-    unsigned_flag    1, if 'seconds' is unsigned, 0, otherwise
-    ltime            output MYSQL_TIME value
-
-  DESCRIPTION
-    If the 'seconds' argument is inside MYSQL_TIME data range, convert it to a
-    corresponding value.
-    Otherwise, truncate the resulting value to the nearest endpoint, and
-    produce a warning message.
-
-  RETURN
-    1                if the value was truncated during conversion
-    0                otherwise
-*/
-  
-static bool sec_to_time(longlong seconds, bool unsigned_flag, MYSQL_TIME *ltime)
-{
-  uint sec;
-
-  bzero((char *)ltime, sizeof(*ltime));
-  
-  if (seconds < 0)
-  {
-    if (unsigned_flag)
-      goto overflow;
-    ltime->neg= 1;
-    if (seconds < -3020399)
-      goto overflow;
-    seconds= -seconds;
-  }
-  else if (seconds > 3020399)
-    goto overflow;
-  
-  sec= (uint) ((ulonglong) seconds % 3600);
-  ltime->hour= (uint) (seconds/3600);
-  ltime->minute= sec/60;
-  ltime->second= sec % 60;
-
-  return 0;
-
-overflow:
-  ltime->hour= TIME_MAX_HOUR;
-  ltime->minute= TIME_MAX_MINUTE;
-  ltime->second= TIME_MAX_SECOND;
-
-  char buf[22];
-  int len= (int)(longlong10_to_str(seconds, buf, unsigned_flag ? 10 : -10)
-                 - buf);
-  make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                               buf, len, MYSQL_TIMESTAMP_TIME,
-                               NullS);
-  
-  return 1;
-}
-
-
 /*
   Date formats corresponding to compound %r and %T conversion specifiers
 
@@ -292,7 +105,8 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 			      const char *val, uint length, MYSQL_TIME *l_time,
                               timestamp_type cached_timestamp_type,
                               const char **sub_pattern_end,
-                              const char *date_time_type)
+                              const char *date_time_type,
+                              uint fuzzy_date)
 {
   int weekday= 0, yearday= 0, daypart= 0;
   int week_number= -1;
@@ -313,6 +127,8 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
   if (!sub_pattern_end)
     bzero((char*) l_time, sizeof(*l_time));
 
+  l_time->time_type= cached_timestamp_type;
+
   for (; ptr != end && val != val_end; ptr++)
   {
     /* Skip pre-space between each argument */
@@ -486,7 +302,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
         */
         if (extract_date_time(&time_ampm_format, val,
                               (uint)(val_end - val), l_time,
-                              cached_timestamp_type, &val, "time"))
+                              cached_timestamp_type, &val, "time", fuzzy_date))
           DBUG_RETURN(1);
         break;
 
@@ -494,7 +310,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       case 'T':
         if (extract_date_time(&time_24hrs_format, val,
                               (uint)(val_end - val), l_time,
-                              cached_timestamp_type, &val, "time"))
+                              cached_timestamp_type, &val, "time", fuzzy_date))
           DBUG_RETURN(1);
         break;
 
@@ -560,8 +376,9 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       %U,%u should be used with %Y and not %X or %x
     */
     if ((strict_week_number &&
-        (strict_week_number_year < 0 ||
-         strict_week_number_year_type != sunday_first_n_first_week_non_iso)) ||
+         (strict_week_number_year < 0 ||
+          strict_week_number_year_type !=
+          sunday_first_n_first_week_non_iso)) ||
         (!strict_week_number && strict_week_number_year >= 0))
       goto err;
 
@@ -600,6 +417,10 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       l_time->minute > 59 || l_time->second > 59)
     goto err;
 
+  if ((fuzzy_date & TIME_NO_ZERO_DATE) &&
+       (l_time->year == 0 || l_time->month == 0 || l_time->day == 0))
+    goto err;
+
   if (val != val_end)
   {
     do
@@ -777,10 +598,10 @@ bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 	break;
       case 'r':
 	length= sprintf(intbuff, ((l_time->hour % 24) < 12) ?
-                        "%02d:%02d:%02d AM" : "%02d:%02d:%02d PM",
-		        (l_time->hour+11)%12+1,
-		        l_time->minute,
-		        l_time->second);
+                    "%02d:%02d:%02d AM" : "%02d:%02d:%02d PM",
+		    (l_time->hour+11)%12+1,
+		    l_time->minute,
+		    l_time->second);
 	str->append(intbuff, length);
 	break;
       case 'S':
@@ -789,8 +610,8 @@ bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 	str->append_with_prefill(intbuff, length, 2, '0');
 	break;
       case 'T':
-	length= sprintf(intbuff,  "%02d:%02d:%02d",
-                        l_time->hour, l_time->minute, l_time->second);
+	length= sprintf(intbuff, "%02d:%02d:%02d",
+		    l_time->hour, l_time->minute, l_time->second);
 	str->append(intbuff, length);
 	break;
       case 'U':
@@ -1094,7 +915,7 @@ longlong Item_func_dayofyear::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
-  if (get_arg0_date(&ltime,TIME_NO_ZERO_DATE))
+  if (get_arg0_date(&ltime, TIME_NO_ZERO_IN_DATE | TIME_NO_ZERO_DATE))
     return 0;
   return (longlong) calc_daynr(ltime.year,ltime.month,ltime.day) -
     calc_daynr(ltime.year,1,1) + 1;
@@ -1360,21 +1181,23 @@ longlong Item_func_year::val_int_endpoint(bool left_endp, bool *incl_endp)
 }
 
 
-longlong Item_func_unix_timestamp::val_int()
+bool Item_func_unix_timestamp::get_timestamp_value(my_time_t *seconds,
+                                                   ulong *second_part)
 {
-  MYSQL_TIME ltime;
-  my_bool not_used;
-  
   DBUG_ASSERT(fixed == 1);
-  if (arg_count == 0)
-    return (longlong) current_thd->query_start();
   if (args[0]->type() == FIELD_ITEM)
   {						// Optimize timestamp field
     Field *field=((Item_field*) args[0])->field;
     if (field->type() == MYSQL_TYPE_TIMESTAMP)
-      return ((Field_timestamp*) field)->get_timestamp(&null_value);
+    {
+      if ((null_value= field->is_null()))
+        return 1;
+      *seconds= ((Field_timestamp*)field)->get_timestamp(second_part);
+      return 0;
+    }
   }
-  
+
+  MYSQL_TIME ltime;
   if (get_arg0_date(&ltime, 0))
   {
     /*
@@ -1383,12 +1206,42 @@ longlong Item_func_unix_timestamp::val_int()
       this case).
     */
     null_value= args[0]->null_value;
-    return 0;
+    return 1;
   }
+
+  uint error_code;
+  *seconds= TIME_to_timestamp(current_thd, &ltime, &error_code);
+  *second_part= ltime.second_part;
+  return (null_value= (error_code == ER_WARN_DATA_OUT_OF_RANGE));
+}
+
+
+longlong Item_func_unix_timestamp::int_op()
+{
+  if (arg_count == 0)
+    return (longlong) current_thd->query_start();
   
-  return (longlong) TIME_to_timestamp(current_thd, &ltime, &not_used);
+  ulong second_part;
+  my_time_t seconds;
+  if (get_timestamp_value(&seconds, &second_part))
+    return 0;
+
+  return seconds;
 }
 
+
+my_decimal *Item_func_unix_timestamp::decimal_op(my_decimal* buf)
+{
+  ulong second_part;
+  my_time_t seconds;
+  if (get_timestamp_value(&seconds, &second_part))
+    return 0;
+
+  return seconds2my_decimal(seconds < 0, seconds < 0 ? -seconds : seconds,
+                            second_part, buf);
+}
+
+
 enum_monotonicity_info Item_func_unix_timestamp::get_monotonicity_info() const
 {
   if (args[0]->type() == Item::FIELD_ITEM &&
@@ -1404,37 +1257,54 @@ longlong Item_func_unix_timestamp::val_int_endpoint(bool left_endp, bool *incl_e
   DBUG_ASSERT(arg_count == 1 &&
               args[0]->type() == Item::FIELD_ITEM &&
               args[0]->field_type() == MYSQL_TYPE_TIMESTAMP);
-  Field *field=((Item_field*) args[0])->field;
+  Field_timestamp *field=(Field_timestamp *)(((Item_field*)args[0])->field);
   /* Leave the incl_endp intact */
-  return ((Field_timestamp*) field)->get_timestamp(&null_value);
+  ulong unused;
+  my_time_t ts= field->get_timestamp(&unused);
+  null_value= field->is_null();
+  return ts;
 }
 
 
-longlong Item_func_time_to_sec::val_int()
+longlong Item_func_time_to_sec::int_op()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
-  longlong seconds;
-  (void) get_arg0_time(&ltime);
-  seconds=ltime.hour*3600L+ltime.minute*60+ltime.second;
+  if (get_arg0_time(&ltime))
+    return 0;
+
+  longlong seconds=ltime.hour*3600L+ltime.minute*60+ltime.second;
   return ltime.neg ? -seconds : seconds;
 }
 
 
+my_decimal *Item_func_time_to_sec::decimal_op(my_decimal* buf)
+{
+  DBUG_ASSERT(fixed == 1);
+  MYSQL_TIME ltime;
+  if (get_arg0_time(&ltime))
+    return 0;
+
+  longlong seconds= ltime.hour*3600L+ltime.minute*60+ltime.second;
+  return seconds2my_decimal(ltime.neg, seconds, ltime.second_part, buf);
+}
+
+
 /**
   Convert a string to a interval value.
 
   To make code easy, allow interval objects without separators.
 */
 
-bool get_interval_value(Item *args,interval_type int_type,
-			       String *str_value, INTERVAL *interval)
+bool get_interval_value(Item *args,interval_type int_type, INTERVAL *interval)
 {
   ulonglong array[5];
   longlong UNINIT_VAR(value);
   const char *UNINIT_VAR(str);
   size_t UNINIT_VAR(length);
-  CHARSET_INFO *cs=str_value->charset();
+  CHARSET_INFO *UNINIT_VAR(cs);
+  char buf[100];
+  String str_value(buf, sizeof(buf), &my_charset_bin);
 
   bzero((char*) interval,sizeof(*interval));
   if ((int) int_type <= INTERVAL_MICROSECOND)
@@ -1451,11 +1321,12 @@ bool get_interval_value(Item *args,interval_type int_type,
   else
   {
     String *res;
-    if (!(res= args->val_str_ascii(str_value)))
+    if (!(res= args->val_str_ascii(&str_value)))
       return (1);
 
     /* record negative intervalls in interval->neg */
     str=res->ptr();
+    cs= res->charset();
     const char *end=str+res->length();
     while (str != end && my_isspace(cs,*str))
       str++;
@@ -1579,71 +1450,84 @@ bool get_interval_value(Item *args,interval_type int_type,
 }
 
 
-String *Item_date::val_str(String *str)
+void Item_temporal_func::fix_length_and_dec()
+{ 
+  static const uint max_time_type_width[5]=
+  { MAX_DATETIME_WIDTH, MAX_DATETIME_WIDTH, MAX_DATE_WIDTH,
+    MAX_DATETIME_WIDTH, MIN_TIME_WIDTH };
+
+  maybe_null= true;
+  max_length= max_time_type_width[mysql_type_to_time_type(field_type())+2];
+  if (decimals)
+  {
+    if (decimals == NOT_FIXED_DEC)
+      max_length+= TIME_SECOND_PART_DIGITS + 1;
+    else
+    {
+      set_if_smaller(decimals, TIME_SECOND_PART_DIGITS);
+      max_length+= decimals + 1;
+    }
+  }
+  sql_mode= current_thd->variables.sql_mode &
+                 (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE);
+  collation.set(&my_charset_numeric, DERIVATION_NUMERIC, MY_REPERTOIRE_ASCII);
+}
+
+String *Item_temporal_func::val_str(String *str)
+{
+  DBUG_ASSERT(fixed == 1);
+  return val_string_from_date(str);
+}
+
+
+longlong Item_temporal_func::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
-  if (get_date(&ltime, TIME_FUZZY_DATE))
-    return (String *) 0;
-  if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return (String *) 0;
-  }
-  make_date((DATE_TIME_FORMAT *) 0, &ltime, str);
-  return str;
+  if (get_date(&ltime, TIME_FUZZY_DATE | sql_mode))
+    return 0;
+  return (longlong)TIME_to_ulonglong(&ltime);
 }
 
 
-longlong Item_date::val_int()
+double Item_temporal_func::val_real()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
-  if (get_date(&ltime, TIME_FUZZY_DATE))
+  if (get_date(&ltime, TIME_FUZZY_DATE | sql_mode))
     return 0;
-  return (longlong) (ltime.year*10000L+ltime.month*100+ltime.day);
+  return TIME_to_double(&ltime);
 }
 
 
 bool Item_func_from_days::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   longlong value=args[0]->val_int();
-  if ((null_value=args[0]->null_value))
-    return 1;
+  if (args[0]->null_value)
+    return (null_value= 1);
+  if ((fuzzy_date & TIME_NO_ZERO_DATE) && value == 0)
+    return (null_value= 1);
   bzero(ltime, sizeof(MYSQL_TIME));
   get_date_from_daynr((long) value, &ltime->year, &ltime->month, &ltime->day);
 
-  if ((null_value= (fuzzy_date & TIME_NO_ZERO_DATE) &&
-       (ltime->year == 0 || ltime->month == 0 || ltime->day == 0)))
-    return TRUE;
+  if ((fuzzy_date & TIME_NO_ZERO_DATE) &&
+       (ltime->year == 0 || ltime->month == 0 || ltime->day == 0))
+    return (null_value= 1);
 
   ltime->time_type= MYSQL_TIMESTAMP_DATE;
-  return 0;
+  return (null_value= 0);
 }
 
 
 void Item_func_curdate::fix_length_and_dec()
 {
-  Item_date::fix_length_and_dec();
-
   store_now_in_TIME(&ltime);
   
   /* We don't need to set second_part and neg because they already 0 */
   ltime.hour= ltime.minute= ltime.second= 0;
   ltime.time_type= MYSQL_TIMESTAMP_DATE;
-  value= (longlong) TIME_to_ulonglong_date(&ltime);
-}
-
-String *Item_func_curdate::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return (String *) 0;
-  }
-  make_date((DATE_TIME_FORMAT *) 0, &ltime, str);
-  return str;
+  Item_datefunc::fix_length_and_dec();
+  maybe_null= false;
 }
 
 /**
@@ -1653,8 +1537,7 @@ String *Item_func_curdate::val_str(String *str)
 void Item_func_curdate_local::store_now_in_TIME(MYSQL_TIME *now_time)
 {
   THD *thd= current_thd;
-  thd->variables.time_zone->gmt_sec_to_TIME(now_time, 
-                                             (my_time_t)thd->query_start());
+  thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start());
   thd->time_zone_used= 1;
 }
 
@@ -1665,8 +1548,8 @@ void Item_func_curdate_local::store_now_in_TIME(MYSQL_TIME *now_time)
 */
 void Item_func_curdate_utc::store_now_in_TIME(MYSQL_TIME *now_time)
 {
-  my_tz_UTC->gmt_sec_to_TIME(now_time, 
-                             (my_time_t)(current_thd->query_start()));
+  THD *thd= current_thd;
+  my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start());
   /* 
     We are not flagging this query as using time zone, since it uses fixed
     UTC-SYSTEM time-zone.
@@ -1682,25 +1565,35 @@ bool Item_func_curdate::get_date(MYSQL_TIME *res,
 }
 
 
-String *Item_func_curtime::val_str(String *str)
+bool Item_func_curtime::fix_fields(THD *thd, Item **items)
 {
-  DBUG_ASSERT(fixed == 1);
-  str_value.set(buff, buff_length, &my_charset_latin1);
-  return &str_value;
+  if (decimals > TIME_SECOND_PART_DIGITS)
+  {
+    my_error(ER_TOO_BIG_PRECISION, MYF(0), decimals, func_name(),
+             TIME_SECOND_PART_DIGITS);
+    return 1;
+  }
+  return Item_timefunc::fix_fields(thd, items);
 }
 
-
-void Item_func_curtime::fix_length_and_dec()
+bool Item_func_curtime::get_date(MYSQL_TIME *res,
+                                 uint fuzzy_date __attribute__((unused)))
 {
-  MYSQL_TIME ltime;
-
-  decimals= DATETIME_DEC;
-  store_now_in_TIME(&ltime);
-  value= TIME_to_ulonglong_time(&ltime);
-  buff_length= (uint) my_time_to_str(&ltime, buff);
-  fix_length_and_charset_datetime(buff_length);
+  *res= ltime;
+  return 0;
 }
 
+static void set_sec_part(ulong sec_part, MYSQL_TIME *ltime, Item *item)
+{
+  DBUG_ASSERT(item->decimals == AUTO_SEC_PART_DIGITS ||
+              item->decimals <= TIME_SECOND_PART_DIGITS);
+  if (item->decimals)
+  {
+    ltime->second_part= sec_part;
+    if (item->decimals < TIME_SECOND_PART_DIGITS)
+      ltime->second_part= sec_part_truncate(ltime->second_part, item->decimals);
+  }
+}
 
 /**
     Converts current time in my_time_t to MYSQL_TIME represenatation for local
@@ -1709,8 +1602,10 @@ void Item_func_curtime::fix_length_and_dec()
 void Item_func_curtime_local::store_now_in_TIME(MYSQL_TIME *now_time)
 {
   THD *thd= current_thd;
-  thd->variables.time_zone->gmt_sec_to_TIME(now_time, 
-                                             (my_time_t)thd->query_start());
+  thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start());
+  now_time->year= now_time->month= now_time->day= 0;
+  now_time->time_type= MYSQL_TIMESTAMP_TIME;
+  set_sec_part(thd->query_start_sec_part(), now_time, this);
   thd->time_zone_used= 1;
 }
 
@@ -1721,35 +1616,28 @@ void Item_func_curtime_local::store_now_in_TIME(MYSQL_TIME *now_time)
 */
 void Item_func_curtime_utc::store_now_in_TIME(MYSQL_TIME *now_time)
 {
-  my_tz_UTC->gmt_sec_to_TIME(now_time, 
-                             (my_time_t)(current_thd->query_start()));
+  THD *thd= current_thd;
+  my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start());
+  now_time->year= now_time->month= now_time->day= 0;
+  now_time->time_type= MYSQL_TIMESTAMP_TIME;
+  set_sec_part(thd->query_start_sec_part(), now_time, this);
   /* 
     We are not flagging this query as using time zone, since it uses fixed
     UTC-SYSTEM time-zone.
   */
 }
 
-
-String *Item_func_now::val_str(String *str)
+bool Item_func_now::fix_fields(THD *thd, Item **items)
 {
-  DBUG_ASSERT(fixed == 1);
-  str_value.set(buff, buff_length, &my_charset_numeric);
-  return &str_value;
-}
-
-
-void Item_func_now::fix_length_and_dec()
-{
-  decimals= DATETIME_DEC;
-
-  store_now_in_TIME(&ltime);
-  value= (longlong) TIME_to_ulonglong_datetime(&ltime);
-
-  buff_length= (uint) my_datetime_to_str(&ltime, buff);
-  fix_length_and_charset_datetime(buff_length);
+  if (decimals > TIME_SECOND_PART_DIGITS)
+  {
+    my_error(ER_TOO_BIG_PRECISION, MYF(0), decimals, func_name(),
+             TIME_SECOND_PART_DIGITS);
+    return 1;
+  }
+  return Item_temporal_func::fix_fields(thd, items);
 }
 
-
 /**
     Converts current time in my_time_t to MYSQL_TIME represenatation for local
     time zone. Defines time zone (local) used for whole NOW function.
@@ -1757,8 +1645,8 @@ void Item_func_now::fix_length_and_dec()
 void Item_func_now_local::store_now_in_TIME(MYSQL_TIME *now_time)
 {
   THD *thd= current_thd;
-  thd->variables.time_zone->gmt_sec_to_TIME(now_time, 
-                                             (my_time_t)thd->query_start());
+  thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start());
+  set_sec_part(thd->query_start_sec_part(), now_time, this);
   thd->time_zone_used= 1;
 }
 
@@ -1769,8 +1657,9 @@ void Item_func_now_local::store_now_in_TIME(MYSQL_TIME *now_time)
 */
 void Item_func_now_utc::store_now_in_TIME(MYSQL_TIME *now_time)
 {
-  my_tz_UTC->gmt_sec_to_TIME(now_time, 
-                             (my_time_t)(current_thd->query_start()));
+  THD *thd= current_thd;
+  my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start());
+  set_sec_part(thd->query_start_sec_part(), now_time, this);
   /* 
     We are not flagging this query as using time zone, since it uses fixed
     UTC-SYSTEM time-zone.
@@ -1786,13 +1675,6 @@ bool Item_func_now::get_date(MYSQL_TIME *res,
 }
 
 
-int Item_func_now::save_in_field(Field *to, bool no_conversions)
-{
-  to->set_notnull();
-  return to->store_time(&ltime, MYSQL_TIMESTAMP_DATETIME);
-}
-
-
 /**
     Converts current time in my_time_t to MYSQL_TIME represenatation for local
     time zone. Defines time zone (local) used for whole SYSDATE function.
@@ -1800,97 +1682,61 @@ int Item_func_now::save_in_field(Field *to, bool no_conversions)
 void Item_func_sysdate_local::store_now_in_TIME(MYSQL_TIME *now_time)
 {
   THD *thd= current_thd;
-  thd->variables.time_zone->gmt_sec_to_TIME(now_time, (my_time_t) my_time(0));
+  my_hrtime_t now= my_hrtime();
+  thd->variables.time_zone->gmt_sec_to_TIME(now_time, hrtime_to_my_time(now));
+  set_sec_part(hrtime_sec_part(now), now_time, this);
   thd->time_zone_used= 1;
 }
 
 
-String *Item_func_sysdate_local::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  store_now_in_TIME(&ltime);
-  buff_length= (uint) my_datetime_to_str(&ltime, buff);
-  str_value.set(buff, buff_length, &my_charset_numeric);
-  return &str_value;
-}
-
-
-longlong Item_func_sysdate_local::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  store_now_in_TIME(&ltime);
-  return (longlong) TIME_to_ulonglong_datetime(&ltime);
-}
-
-
-double Item_func_sysdate_local::val_real()
-{
-  DBUG_ASSERT(fixed == 1);
-  store_now_in_TIME(&ltime);
-  return ulonglong2double(TIME_to_ulonglong_datetime(&ltime));
-}
-
-
-void Item_func_sysdate_local::fix_length_and_dec()
-{
-  decimals= 0;
-  fix_length_and_charset_datetime(MAX_DATETIME_WIDTH);
-}
-
-
 bool Item_func_sysdate_local::get_date(MYSQL_TIME *res,
                                        uint fuzzy_date __attribute__((unused)))
 {
-  store_now_in_TIME(&ltime);
-  *res= ltime;
-  return 0;
-}
-
-
-int Item_func_sysdate_local::save_in_field(Field *to, bool no_conversions)
-{
-  store_now_in_TIME(&ltime);
-  to->set_notnull();
-  to->store_time(&ltime, MYSQL_TIMESTAMP_DATETIME);
+  store_now_in_TIME(res);
   return 0;
 }
 
-
-String *Item_func_sec_to_time::val_str(String *str)
+bool Item_func_sec_to_time::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  longlong arg_val= args[0]->val_int(); 
+  bool sign;
+  ulonglong sec;
+  ulong sec_part;
 
-  if ((null_value=args[0]->null_value) ||
-      str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return (String*) 0;
-  }
+  bzero((char *)ltime, sizeof(*ltime));
+  ltime->time_type= MYSQL_TIMESTAMP_TIME;
 
-  sec_to_time(arg_val, args[0]->unsigned_flag, &ltime);
-  
-  make_time((DATE_TIME_FORMAT *) 0, &ltime, str);
-  return str;
-}
+  sign= args[0]->get_seconds(&sec, &sec_part);
+
+  if ((null_value= args[0]->null_value))
+    return 1;
 
+  ltime->neg= sign;
+  if (sec > TIME_MAX_VALUE_SECONDS)
+    goto overflow;
 
-longlong Item_func_sec_to_time::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  longlong arg_val= args[0]->val_int(); 
+  DBUG_ASSERT(sec_part <= TIME_MAX_SECOND_PART);
   
-  if ((null_value=args[0]->null_value))
-    return 0;
+  ltime->hour=   (uint) (sec/3600);
+  ltime->minute= (uint) (sec % 3600) /60;
+  ltime->second= (uint) sec % 60;
+  ltime->second_part= sec_part;
 
-  sec_to_time(arg_val, args[0]->unsigned_flag, &ltime);
+  return 0;
 
-  return (ltime.neg ? -1 : 1) *
-    (longlong) ((ltime.hour)*10000 + ltime.minute*100 + ltime.second);
-}
+overflow:
+  /* use check_time_range() to set ltime to the max value depending on dec */
+  int unused;
+  char buf[100];
+  String tmp(buf, sizeof(buf), &my_charset_bin), *err= args[0]->val_str(&tmp);
 
+  ltime->hour= TIME_MAX_HOUR+1;
+  check_time_range(ltime, decimals, &unused);
+  make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                               err->ptr(), err->length(),
+                               MYSQL_TIMESTAMP_TIME, NullS);
+  return 0;
+}
 
 void Item_func_date_format::fix_length_and_dec()
 {
@@ -2027,25 +1873,12 @@ String *Item_func_date_format::val_str(String *str)
   String *format;
   MYSQL_TIME l_time;
   uint size;
+  int is_time_flag = is_time_format ? TIME_TIME_ONLY : 0;
   DBUG_ASSERT(fixed == 1);
-
-  if (!is_time_format)
-  {
-    if (get_arg0_date(&l_time, TIME_FUZZY_DATE))
-      return 0;
-  }
-  else
-  {
-    String *res;
-    if (!(res=args[0]->val_str(str)) ||
-	(str_to_time_with_warn(res->charset(), res->ptr(), res->length(),
-	                       &l_time)))
-      goto null_date;
-
-    l_time.year=l_time.month=l_time.day=0;
-    null_value=0;
-  }
-
+  
+  if (get_arg0_date(&l_time, TIME_FUZZY_DATE | is_time_flag))
+    return 0;
+  
   if (!(format = args[1]->val_str(str)) || !format->length())
     goto null_date;
 
@@ -2083,98 +1916,39 @@ null_date:
 void Item_func_from_unixtime::fix_length_and_dec()
 { 
   thd= current_thd;
-  decimals= DATETIME_DEC;
-  fix_length_and_charset_datetime(MAX_DATETIME_WIDTH);
-  maybe_null= 1;
   thd->time_zone_used= 1;
+  decimals= args[0]->decimals;
+  Item_temporal_func::fix_length_and_dec();
 }
 
 
-String *Item_func_from_unixtime::val_str(String *str)
-{
-  MYSQL_TIME time_tmp;
-
-  DBUG_ASSERT(fixed == 1);
-
-  if (get_date(&time_tmp, 0))
-    return 0;
-
-  if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return 0;
-  }
-
-  make_datetime((DATE_TIME_FORMAT *) 0, &time_tmp, str);
-
-  return str;
-}
-
-
-longlong Item_func_from_unixtime::val_int()
-{
-  MYSQL_TIME time_tmp;
-
-  DBUG_ASSERT(fixed == 1);
-
-  if (get_date(&time_tmp, 0))
-    return 0;
-
-  return (longlong) TIME_to_ulonglong_datetime(&time_tmp);
-}
-
 bool Item_func_from_unixtime::get_date(MYSQL_TIME *ltime,
 				       uint fuzzy_date __attribute__((unused)))
 {
-  ulonglong tmp= (ulonglong)(args[0]->val_int());
-  /*
-    "tmp > TIMESTAMP_MAX_VALUE" check also covers case of negative
-    from_unixtime() argument since tmp is unsigned.
-  */
-  if ((null_value= (args[0]->null_value || tmp > TIMESTAMP_MAX_VALUE)))
-    return 1;
-
-  thd->variables.time_zone->gmt_sec_to_TIME(ltime, (my_time_t)tmp);
-
-  return 0;
-}
-
-
-void Item_func_convert_tz::fix_length_and_dec()
-{
-  decimals= 0;
-  fix_length_and_charset_datetime(MAX_DATETIME_WIDTH);
-  maybe_null= 1;
-}
+  bool sign;
+  ulonglong sec;
+  ulong sec_part;
 
+  bzero((char *)ltime, sizeof(*ltime));
+  ltime->time_type= MYSQL_TIMESTAMP_TIME;
 
-String *Item_func_convert_tz::val_str(String *str)
-{
-  MYSQL_TIME time_tmp;
+  sign= args[0]->get_seconds(&sec, &sec_part);
 
-  if (get_date(&time_tmp, 0))
-    return 0;
+  if (args[0]->null_value || sign || sec > TIMESTAMP_MAX_VALUE)
+    return (null_value= 1);
 
-  if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return 0;
-  }
+  thd->variables.time_zone->gmt_sec_to_TIME(ltime, (my_time_t)sec);
 
-  make_datetime((DATE_TIME_FORMAT *) 0, &time_tmp, str);
+  ltime->second_part= sec_part;
 
-  return str;
+  return (null_value= 0);
 }
 
 
-longlong Item_func_convert_tz::val_int()
+void Item_func_convert_tz::fix_length_and_dec()
 {
-  MYSQL_TIME time_tmp;
-
-  if (get_date(&time_tmp, 0))
-    return 0;
-  
-  return (longlong)TIME_to_ulonglong_datetime(&time_tmp);
+  decimals= args[0]->decimals;
+  Item_temporal_func::fix_length_and_dec();
 }
 
 
@@ -2197,29 +1971,29 @@ bool Item_func_convert_tz::get_date(MYSQL_TIME *ltime,
     to_tz_cached= args[2]->const_item();
   }
 
-  if (from_tz==0 || to_tz==0 || get_arg0_date(ltime, TIME_NO_ZERO_DATE))
-  {
-    null_value= 1;
-    return 1;
-  }
+  if (from_tz==0 || to_tz==0 ||
+      get_arg0_date(ltime, TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE))
+    return (null_value= 1);
 
   {
-    my_bool not_used;
+    uint not_used;
     my_time_tmp= from_tz->TIME_to_gmt_sec(ltime, &not_used);
+    ulong sec_part= ltime->second_part;
     /* my_time_tmp is guranteed to be in the allowed range */
     if (my_time_tmp)
       to_tz->gmt_sec_to_TIME(ltime, my_time_tmp);
+    /* we rely on the fact that no timezone conversion can change sec_part */
+    ltime->second_part= sec_part;
   }
 
-  null_value= 0;
-  return 0;
+  return (null_value= 0);
 }
 
 
 void Item_func_convert_tz::cleanup()
 {
   from_tz_cached= to_tz_cached= 0;
-  Item_date_func::cleanup();
+  Item_temporal_func::cleanup();
 }
 
 
@@ -2227,18 +2001,20 @@ void Item_date_add_interval::fix_length_and_dec()
 {
   enum_field_types arg0_field_type;
 
-  maybe_null=1;
-
   /*
     The field type for the result of an Item_date function is defined as
     follows:
 
     - If first arg is a MYSQL_TYPE_DATETIME result is MYSQL_TYPE_DATETIME
     - If first arg is a MYSQL_TYPE_DATE and the interval type uses hours,
-      minutes or seconds then type is MYSQL_TYPE_DATETIME.
+      minutes or seconds then type is MYSQL_TYPE_DATETIME
+      otherwise it's MYSQL_TYPE_DATE
+    - if first arg is a MYSQL_TYPE_TIME and the interval type isn't using
+      anything larger than days, then the result is MYSQL_TYPE_TIME,
+      otherwise - MYSQL_TYPE_DATETIME.
     - Otherwise the result is MYSQL_TYPE_STRING
-      (This is because you can't know if the string contains a DATE, MYSQL_TIME or
-      DATETIME argument)
+      (This is because you can't know if the string contains a DATE,
+      MYSQL_TIME or DATETIME argument)
   */
   cached_field_type= MYSQL_TYPE_STRING;
   arg0_field_type= args[0]->field_type();
@@ -2252,21 +2028,19 @@ void Item_date_add_interval::fix_length_and_dec()
     else
       cached_field_type= MYSQL_TYPE_DATETIME;
   }
-
-  if (cached_field_type == MYSQL_TYPE_STRING)
+  else if (arg0_field_type == MYSQL_TYPE_TIME)
   {
-    /* Behave as a usual string function when return type is VARCHAR. */
-    fix_length_and_charset(MAX_DATETIME_FULL_WIDTH, default_charset());
+    if (int_type >= INTERVAL_DAY && int_type != INTERVAL_YEAR_MONTH)
+      cached_field_type= arg0_field_type;
+    else
+      cached_field_type= MYSQL_TYPE_DATETIME;
   }
+  if (int_type == INTERVAL_MICROSECOND || int_type >= INTERVAL_DAY_MICROSECOND)
+    decimals= 6;
   else
-  {
-    /*
-      Follow the "Number-to-string conversion" rules as in WorkLog 2649
-      when return type is DATE or DATETIME.
-    */
-    fix_length_and_charset_datetime(MAX_DATETIME_FULL_WIDTH);
-  }
-  value.alloc(max_length);
+    decimals= args[0]->decimals;
+
+  Item_temporal_func::fix_length_and_dec();
 }
 
 
@@ -2276,57 +2050,19 @@ bool Item_date_add_interval::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   INTERVAL interval;
 
-  if (args[0]->get_date(ltime, TIME_NO_ZERO_DATE) ||
-      get_interval_value(args[1], int_type, &value, &interval))
+  if (args[0]->get_date(ltime, TIME_NO_ZERO_DATE | TIME_FUZZY_DATE) ||
+      get_interval_value(args[1], int_type, &interval))
     return (null_value=1);
 
   if (date_sub_interval)
     interval.neg = !interval.neg;
 
-  if ((null_value= date_add_interval(ltime, int_type, interval)))
-    return 1;
-  return 0;
-}
-
-
-String *Item_date_add_interval::val_str_ascii(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  enum date_time_format_types format;
-
-  if (Item_date_add_interval::get_date(&ltime, TIME_NO_ZERO_DATE))
-    return 0;
-
-  if (ltime.time_type == MYSQL_TIMESTAMP_DATE)
-    format= DATE_ONLY;
-  else if (ltime.second_part)
-    format= DATE_TIME_MICROSECOND;
-  else
-    format= DATE_TIME;
-
-  if (!make_datetime(format, &ltime, str))
-    return str;
-
-  null_value=1;
-  return 0;
-}
-
-
-longlong Item_date_add_interval::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  longlong date;
-  if (Item_date_add_interval::get_date(&ltime, TIME_NO_ZERO_DATE))
-    return (longlong) 0;
-  date = (ltime.year*100L + ltime.month)*100L + ltime.day;
-  return ltime.time_type == MYSQL_TIMESTAMP_DATE ? date :
-    ((date*100L + ltime.hour)*100L+ ltime.minute)*100L + ltime.second;
+  if (date_add_interval(ltime, int_type, interval))
+    return (null_value=1);
+  return (null_value= 0);
 }
 
 
-
 bool Item_date_add_interval::eq(const Item *item, bool binary_cmp) const
 {
   Item_date_add_interval *other= (Item_date_add_interval*) item;
@@ -2408,27 +2144,12 @@ longlong Item_extract::val_int()
   uint year;
   ulong week_format;
   long neg;
-  if (date_value)
-  {
-    if (get_arg0_date(&ltime, TIME_FUZZY_DATE))
-      return 0;
-    neg=1;
-  }
-  else
-  {
-    char buf[40];
-    String value(buf, sizeof(buf), &my_charset_bin);;
-    String *res= args[0]->val_str(&value);
-    if (!res ||
-        str_to_time_with_warn(res->charset(), res->ptr(), res->length(),
-                              &ltime))
-    {
-      null_value=1;
-      return 0;
-    }
-    neg= ltime.neg ? -1 : 1;
-    null_value=0;
-  }
+  int is_time_flag = date_value ? 0 : TIME_TIME_ONLY;
+
+  if (get_arg0_date(&ltime, TIME_FUZZY_DATE | is_time_flag))
+    return 0;
+  neg= ltime.neg ? -1 : 1;
+
   switch (int_type) {
   case INTERVAL_YEAR:		return ltime.year;
   case INTERVAL_YEAR_MONTH:	return ltime.year*100L+ltime.month;
@@ -2511,12 +2232,19 @@ bool Item_char_typecast::eq(const Item *item, bool binary_cmp) const
   return 1;
 }
 
-void Item_typecast::print(String *str, enum_query_type query_type)
+void Item_temporal_typecast::print(String *str, enum_query_type query_type)
 {
+  char buf[32];
   str->append(STRING_WITH_LEN("cast("));
   args[0]->print(str, query_type);
   str->append(STRING_WITH_LEN(" as "));
   str->append(cast_type());
+  if (decimals)
+  {
+    str->append('(');
+    str->append(llstr(decimals, buf));
+    str->append(')');
+  }
   str->append(')');
 }
 
@@ -2526,13 +2254,13 @@ void Item_char_typecast::print(String *str, enum_query_type query_type)
   str->append(STRING_WITH_LEN("cast("));
   args[0]->print(str, query_type);
   str->append(STRING_WITH_LEN(" as char"));
-  if (cast_length >= 0)
+  if (cast_length != ~0U)
   {
     str->append('(');
     char buffer[20];
     // my_charset_bin is good enough for numbers
     String st(buffer, sizeof(buffer), &my_charset_bin);
-    st.set((ulonglong)cast_length, &my_charset_bin);
+    st.set(static_cast<ulonglong>(cast_length), &my_charset_bin);
     str->append(st);
     str->append(')');
   }
@@ -2550,8 +2278,8 @@ String *Item_char_typecast::val_str(String *str)
   String *res;
   uint32 length;
 
-  if (cast_length >= 0 &&
-      ((unsigned) cast_length) > current_thd->variables.max_allowed_packet)
+  if (cast_length != ~0U &&
+      cast_length > current_thd->variables.max_allowed_packet)
   {
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
 			ER_WARN_ALLOWED_PACKET_OVERFLOWED,
@@ -2573,10 +2301,15 @@ String *Item_char_typecast::val_str(String *str)
   }
   else
   {
-    // Convert character set if differ
+    /*
+      Convert character set if differ
+      from_cs is 0 in the case where the result set may vary between calls,
+      for example with dynamic columns.
+    */
     uint dummy_errors;
     if (!(res= args[0]->val_str(str)) ||
-        tmp_value.copy(res->ptr(), res->length(), from_cs,
+        tmp_value.copy(res->ptr(), res->length(),
+                       from_cs ? from_cs  : res->charset(),
                        cast_cs, &dummy_errors))
     {
       null_value= 1;
@@ -2592,7 +2325,7 @@ String *Item_char_typecast::val_str(String *str)
     and the result is longer than cast length, e.g.
     CAST('string' AS CHAR(1))
   */
-  if (cast_length >= 0)
+  if (cast_length != ~0U)
   {
     if (res->length() > (length= (uint32) res->charpos(cast_length)))
     {                                           // Safe even if const arg
@@ -2613,16 +2346,15 @@ String *Item_char_typecast::val_str(String *str)
                           err.ptr());
       res->length((uint) length);
     }
-    else if (cast_cs == &my_charset_bin && res->length() < (uint) cast_length)
+    else if (cast_cs == &my_charset_bin && res->length() < cast_length)
     {
-      if (res->alloced_length() < (uint) cast_length)
+      if (res->alloced_length() < cast_length)
       {
         str_value.alloc(cast_length);
         str_value.copy(*res);
         res= &str_value;
       }
-      bzero((char*) res->ptr() + res->length(),
-            (uint) cast_length - res->length());
+      bzero((char*) res->ptr() + res->length(), cast_length - res->length());
       res->length(cast_length);
     }
   }
@@ -2655,194 +2387,117 @@ void Item_char_typecast::fix_length_and_dec()
        and thus avoid unnecessary character set conversion.
      - If the argument is not a number, then from_cs is set to
        the argument's charset.
+     - If argument has a dynamic collation (can change from call to call)
+       we set from_cs to 0 as a marker that we have to take the collation
+       from the result string.
 
        Note (TODO): we could use repertoire technique here.
   */
-  from_cs= (args[0]->result_type() == INT_RESULT || 
-            args[0]->result_type() == DECIMAL_RESULT ||
-            args[0]->result_type() == REAL_RESULT) ?
-           (cast_cs->mbminlen == 1 ? cast_cs : &my_charset_latin1) :
-           args[0]->collation.collation;
-  charset_conversion= (cast_cs->mbmaxlen > 1) ||
+  from_cs= ((args[0]->result_type() == INT_RESULT || 
+             args[0]->result_type() == DECIMAL_RESULT ||
+             args[0]->result_type() == REAL_RESULT) ?
+            (cast_cs->mbminlen == 1 ? cast_cs : &my_charset_latin1) :
+            args[0]->dynamic_result() ? 0 :
+            args[0]->collation.collation);
+  charset_conversion= !from_cs || (cast_cs->mbmaxlen > 1) ||
                       (!my_charset_same(from_cs, cast_cs) &&
                        from_cs != &my_charset_bin &&
                        cast_cs != &my_charset_bin);
   collation.set(cast_cs, DERIVATION_IMPLICIT);
-  char_length= (cast_length >= 0) ? cast_length :
+  char_length= ((cast_length != ~0U) ? cast_length :
                 args[0]->max_length /
-                (cast_cs == &my_charset_bin ? 1 : args[0]->collation.collation->mbmaxlen);
+                (cast_cs == &my_charset_bin ? 1 :
+                 args[0]->collation.collation->mbmaxlen));
   max_length= char_length * cast_cs->mbmaxlen;
 }
 
 
-String *Item_datetime_typecast::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (!get_arg0_date(&ltime, TIME_FUZZY_DATE) &&
-      !make_datetime(ltime.second_part ? DATE_TIME_MICROSECOND : DATE_TIME, 
-		     &ltime, str))
-    return str;
-
-  null_value=1;
-  return 0;
-}
-
-
-longlong Item_datetime_typecast::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  if (get_arg0_date(&ltime,1))
-  {
-    null_value= 1;
-    return 0;
-  }
-
-  return TIME_to_ulonglong_datetime(&ltime);
-}
-
-
-bool Item_time_typecast::get_time(MYSQL_TIME *ltime)
+bool Item_time_typecast::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
-  bool res= get_arg0_time(ltime);
+  if (get_arg0_time(ltime))
+    return 1;
+  if (decimals < TIME_SECOND_PART_DIGITS)
+    ltime->second_part= sec_part_truncate(ltime->second_part, decimals);
   /*
-    For MYSQL_TIMESTAMP_TIME value we can have non-zero day part,
+    MYSQL_TIMESTAMP_TIME value can have non-zero day part,
     which we should not lose.
   */
-  if (ltime->time_type == MYSQL_TIMESTAMP_DATETIME)
+  if (ltime->time_type != MYSQL_TIMESTAMP_TIME)
     ltime->year= ltime->month= ltime->day= 0;
   ltime->time_type= MYSQL_TIMESTAMP_TIME;
-  return res;
-}
-
-
-longlong Item_time_typecast::val_int()
-{
-  MYSQL_TIME ltime;
-  if (get_time(&ltime))
-  {
-    null_value= 1;
-    return 0;
-  }
-  return (ltime.neg ? -1 : 1) *
-    (longlong) ((ltime.hour)*10000 + ltime.minute*100 + ltime.second);
-}
-
-String *Item_time_typecast::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (!get_arg0_time(&ltime) &&
-      !make_datetime(ltime.second_part ? TIME_MICROSECOND : TIME_ONLY,
-		     &ltime, str))
-    return str;
-
-  null_value=1;
   return 0;
 }
 
 
 bool Item_date_typecast::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
-  bool res= get_arg0_date(ltime, fuzzy_date);
+  if (get_arg0_date(ltime, TIME_FUZZY_DATE))
+    return 1;
+
   ltime->hour= ltime->minute= ltime->second= ltime->second_part= 0;
   ltime->time_type= MYSQL_TIMESTAMP_DATE;
-  return res;
-}
 
-
-bool Item_date_typecast::get_time(MYSQL_TIME *ltime)
-{
-  bzero((char *)ltime, sizeof(MYSQL_TIME));
-  return args[0]->null_value;
-}
-
-
-String *Item_date_typecast::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (!get_arg0_date(&ltime, TIME_FUZZY_DATE) &&
-      !str->alloc(MAX_DATE_STRING_REP_LENGTH))
+  int unused;
+  if (check_date(ltime, ltime->year || ltime->month || ltime->day,
+                 fuzzy_date, &unused))
   {
-    make_date((DATE_TIME_FORMAT *) 0, &ltime, str);
-    return str;
+    ErrConvTime str(ltime);
+    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                 &str, MYSQL_TIMESTAMP_DATE, 0);
+    return (null_value= 1);
   }
-
-  null_value=1;
-  return 0;
+  return (null_value= 0);
 }
 
-longlong Item_date_typecast::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  if ((null_value= args[0]->get_date(&ltime, TIME_FUZZY_DATE)))
-    return 0;
-  return (longlong) (ltime.year * 10000L + ltime.month * 100 + ltime.day);
-}
-
-/**
-  MAKEDATE(a,b) is a date function that creates a date value 
-  from a year and day value.
 
-  NOTES:
-    As arguments are integers, we can't know if the year is a 2 digit or 4 digit year.
-    In this case we treat all years < 100 as 2 digit years. Ie, this is not safe
-    for dates between 0000-01-01 and 0099-12-31
-*/
-
-String *Item_func_makedate::val_str(String *str)
+bool Item_datetime_typecast::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME l_time;
-  long daynr=  (long) args[1]->val_int();
-  long year= (long) args[0]->val_int();
-  long days;
+  if (get_arg0_date(ltime, fuzzy_date & ~TIME_TIME_ONLY))
+    return 1;
 
-  if (args[0]->null_value || args[1]->null_value ||
-      year < 0 || year > 9999 || daynr <= 0)
-    goto err;
+  if (decimals < TIME_SECOND_PART_DIGITS)
+    ltime->second_part= sec_part_truncate(ltime->second_part, decimals);
 
-  if (year < 100)
-    year= year_2000_handling(year);
 
-  days= calc_daynr(year,1,1) + daynr - 1;
-  /* Day number from year 0 to 9999-12-31 */
-  if (days >= 0 && days <= MAX_DAY_NUMBER)
+  /*
+    ltime is valid MYSQL_TYPE_TIME (according to fuzzy_date).
+    But not every valid TIME value is a valid DATETIME value!
+  */
+  if (ltime->time_type == MYSQL_TIMESTAMP_TIME)
   {
-    null_value=0;
-    get_date_from_daynr(days,&l_time.year,&l_time.month,&l_time.day);
-    if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-      goto err;
-    make_date((DATE_TIME_FORMAT *) 0, &l_time, str);
-    return str;
+    if (ltime->neg)
+    {
+      ErrConvTime str(ltime);
+      make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                   &str, MYSQL_TIMESTAMP_DATETIME, 0);
+      return (null_value= 1);
+    }
+    
+    uint day= ltime->hour/24;
+    ltime->hour %= 24;
+    ltime->month= day / 31;
+    ltime->day= day % 31;
   }
 
-err:
-  null_value=1;
+  ltime->time_type= MYSQL_TIMESTAMP_DATETIME;
   return 0;
 }
 
 
-/*
+/**
   MAKEDATE(a,b) is a date function that creates a date value 
   from a year and day value.
 
   NOTES:
-    As arguments are integers, we can't know if the year is a 2 digit or 4 digit year.
-    In this case we treat all years < 100 as 2 digit years. Ie, this is not safe
-    for dates between 0000-01-01 and 0099-12-31
+    As arguments are integers, we can't know if the year is a 2 digit
+    or 4 digit year.  In this case we treat all years < 100 as 2 digit
+    years. Ie, this is not safe for dates between 0000-01-01 and
+    0099-12-31
 */
 
-longlong Item_func_makedate::val_int()
+bool Item_func_makedate::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME l_time;
   long daynr=  (long) args[1]->val_int();
   long year= (long) args[0]->val_int();
   long days;
@@ -2856,25 +2511,23 @@ longlong Item_func_makedate::val_int()
 
   days= calc_daynr(year,1,1) + daynr - 1;
   /* Day number from year 0 to 9999-12-31 */
-  if (days >= 0 && days < MAX_DAY_NUMBER)
+  if (days >= 0 && days <= MAX_DAY_NUMBER)
   {
-    null_value=0;
-    get_date_from_daynr(days,&l_time.year,&l_time.month,&l_time.day);
-    return (longlong) (l_time.year * 10000L + l_time.month * 100 + l_time.day);
+    bzero(ltime, sizeof(*ltime));
+    ltime->time_type= MYSQL_TIMESTAMP_DATE;
+    get_date_from_daynr(days, &ltime->year, &ltime->month, &ltime->day);
+    return (null_value= 0);
   }
 
 err:
-  null_value= 1;
-  return 0;
+  return (null_value= 1);
 }
 
 
 void Item_func_add_time::fix_length_and_dec()
 {
   enum_field_types arg0_field_type;
-  decimals=0;
-  fix_length_and_charset_datetime(MAX_DATETIME_FULL_WIDTH);
-  maybe_null= 1;
+  decimals= max(args[0]->decimals, args[1]->decimals);
 
   /*
     The field type for the result of an Item_func_add_time function is defined
@@ -2894,6 +2547,7 @@ void Item_func_add_time::fix_length_and_dec()
     cached_field_type= MYSQL_TYPE_DATETIME;
   else if (arg0_field_type == MYSQL_TYPE_TIME)
     cached_field_type= MYSQL_TYPE_TIME;
+  Item_temporal_func::fix_length_and_dec();
 }
 
 /**
@@ -2906,104 +2560,79 @@ void Item_func_add_time::fix_length_and_dec()
   Result: Time value or datetime value
 */
 
-MYSQL_TIME *Item_func_add_time::val_datetime(MYSQL_TIME *time,
-                                             date_time_format_types *format)
+bool Item_func_add_time::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME l_time1, l_time2;
   bool is_time= 0;
   long days, microseconds;
   longlong seconds;
-  int l_sign= sign;
+  int l_sign= sign, was_cut= 0;
+  uint dec= decimals;
 
-  null_value=0;
   if (is_date)                        // TIMESTAMP function
   {
     if (get_arg0_date(&l_time1, TIME_FUZZY_DATE) || 
         args[1]->get_time(&l_time2) ||
         l_time1.time_type == MYSQL_TIMESTAMP_TIME || 
         l_time2.time_type != MYSQL_TIMESTAMP_TIME)
-      goto null_date;
+      return (null_value= 1);
   }
   else                                // ADDTIME function
   {
     if (args[0]->get_time(&l_time1) || 
         args[1]->get_time(&l_time2) ||
         l_time2.time_type == MYSQL_TIMESTAMP_DATETIME)
-      goto null_date;
+      return (null_value= 1);
     is_time= (l_time1.time_type == MYSQL_TIMESTAMP_TIME);
   }
   if (l_time1.neg != l_time2.neg)
     l_sign= -l_sign;
   
-  bzero((char *)time, sizeof(MYSQL_TIME));
+  bzero(ltime, sizeof(*ltime));
   
-  time->neg= calc_time_diff(&l_time1, &l_time2, -l_sign,
-                            &seconds, &microseconds);
+  ltime->neg= calc_time_diff(&l_time1, &l_time2, -l_sign,
+			      &seconds, &microseconds);
 
   /*
     If first argument was negative and diff between arguments
     is non-zero we need to swap sign to get proper result.
   */
   if (l_time1.neg && (seconds || microseconds))
-    time->neg= 1 - time->neg;         // Swap sign of result
+    ltime->neg= 1-ltime->neg;         // Swap sign of result
 
-  if (!is_time && time->neg)
-    goto null_date;
+  if (!is_time && ltime->neg)
+    return (null_value= 1);
 
   days= (long)(seconds/86400L);
 
-  calc_time_from_sec(time, (long)(seconds%86400L), microseconds);
+  calc_time_from_sec(ltime, (long)(seconds%86400L), microseconds);
+
+  ltime->time_type= is_time ? MYSQL_TIMESTAMP_TIME : MYSQL_TIMESTAMP_DATETIME;
+
+  if (cached_field_type == MYSQL_TYPE_STRING &&
+      (l_time1.second_part || l_time2.second_part))
+    dec= TIME_SECOND_PART_DIGITS;
 
   if (!is_time)
   {
-    get_date_from_daynr(days, &time->year, &time->month, &time->day);
-    *format= l_time1.second_part || l_time2.second_part ?
-             DATE_TIME_MICROSECOND : DATE_TIME;
-    if (time->day)
-      return time;
-    goto null_date;
+    get_date_from_daynr(days,&ltime->year,&ltime->month,&ltime->day);
+    if (!ltime->day)
+      return (null_value= 1);
+    return (null_value= 0);
   }
-  *format= l_time1.second_part || l_time2.second_part ?
-           TIME_MICROSECOND : TIME_ONLY;
-  time->hour+= days*24;
-  return time;
-
-null_date:
-  null_value=1;
-  return 0;
-}
-
-
-String *Item_func_add_time::val_str(String *str)
-{
-  MYSQL_TIME ltime;
-  date_time_format_types format;
-
-  val_datetime(&ltime, &format);
-
-  if (null_value)
-    return 0;
-
-  if (!make_datetime_with_warn(format, &ltime, str))
-    return str;
-
-  null_value= 1;
-  return 0;
-}
-
-
-longlong Item_func_add_time::val_int()
-{
-  MYSQL_TIME ltime;
-  date_time_format_types format;
+  
+  ltime->hour+= days*24;
 
-  val_datetime(&ltime, &format);
+  MYSQL_TIME copy= *ltime;
+  ErrConvTime str(&copy);
 
-  if (null_value)
-    return 0;
+  check_time_range(ltime, decimals, &was_cut);
+  if (was_cut)
+    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                 &str, MYSQL_TIMESTAMP_TIME, NullS);
 
-  return TIME_to_ulonglong_datetime(&ltime);
+  return (null_value= 0);
 }
 
 
@@ -3036,19 +2665,23 @@ void Item_func_add_time::print(String *str, enum_query_type query_type)
   Result: Time value
 */
 
-String *Item_func_timediff::val_str(String *str)
+bool Item_func_timediff::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
   longlong seconds;
   long microseconds;
-  int l_sign= 1;
-  MYSQL_TIME l_time1 ,l_time2, l_time3;
+  int l_sign= 1, was_cut= 0;
+  MYSQL_TIME l_time1,l_time2,l_time3;
+  ErrConvTime str(&l_time3);
+
+  /* the following may be true in, for example, date_add(timediff(...), ... */
+  if (fuzzy_date & TIME_NO_ZERO_IN_DATE)
+    return (null_value= 1);
 
-  null_value= 0;  
   if (args[0]->get_time(&l_time1) ||
       args[1]->get_time(&l_time2) ||
       l_time1.time_type != l_time2.time_type)
-    goto null_date;
+    return (null_value= 1);
 
   if (l_time1.neg != l_time2.neg)
     l_sign= -l_sign;
@@ -3066,16 +2699,27 @@ String *Item_func_timediff::val_str(String *str)
   if (l_time1.neg && (seconds || microseconds))
     l_time3.neg= 1-l_time3.neg;         // Swap sign of result
 
+  /*
+    seconds is longlong, when casted to long it may become a small number
+    even if the original seconds value was too large and invalid.
+    as a workaround we limit seconds by a large invalid long number
+    ("invalid" means > TIME_MAX_SECOND)
+  */
+  set_if_smaller(seconds, INT_MAX32);
+
   calc_time_from_sec(&l_time3, (long) seconds, microseconds);
 
-  if (!make_datetime_with_warn(l_time1.second_part || l_time2.second_part ?
-                               TIME_MICROSECOND : TIME_ONLY,
-                               &l_time3, str))
-    return str;
+  if ((fuzzy_date & TIME_NO_ZERO_DATE) && (seconds == 0) &&
+      (microseconds == 0))
+    return (null_value= 1);
 
-null_date:
-  null_value=1;
-  return 0;
+  *ltime= l_time3;
+  check_time_range(ltime, decimals, &was_cut);
+
+  if (was_cut)
+    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                 &str, MYSQL_TIMESTAMP_TIME, NullS);
+  return (null_value= 0);
 }
 
 /**
@@ -3084,26 +2728,21 @@ null_date:
   Result: Time value
 */
 
-String *Item_func_maketime::val_str(String *str)
+bool Item_func_maketime::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
   bool overflow= 0;
 
   longlong hour=   args[0]->val_int();
   longlong minute= args[1]->val_int();
   longlong second= args[2]->val_int();
 
-  if ((null_value=(args[0]->null_value || 
-                   args[1]->null_value ||
-                   args[2]->null_value ||
-                   minute < 0 || minute > 59 ||
-                   second < 0 || second > 59 ||
-                   str->alloc(MAX_DATE_STRING_REP_LENGTH))))
-    return 0;
+  if (args[0]->null_value || args[1]->null_value || args[2]->null_value ||
+       minute < 0 || minute > 59 || second < 0 || second > 59)
+    return (null_value= 1);
 
-  bzero((char *)&ltime, sizeof(ltime));
-  ltime.neg= 0;
+  bzero(ltime, sizeof(*ltime));
+  ltime->time_type= MYSQL_TIMESTAMP_TIME;
 
   /* Check for integer overflows */
   if (hour < 0)
@@ -3111,37 +2750,31 @@ String *Item_func_maketime::val_str(String *str)
     if (args[0]->unsigned_flag)
       overflow= 1;
     else
-      ltime.neg= 1;
+      ltime->neg= 1;
   }
-  if (-hour > UINT_MAX || hour > UINT_MAX)
+  if (-hour > TIME_MAX_HOUR || hour > TIME_MAX_HOUR)
     overflow= 1;
 
   if (!overflow)
   {
-    ltime.hour=   (uint) ((hour < 0 ? -hour : hour));
-    ltime.minute= (uint) minute;
-    ltime.second= (uint) second;
+    ltime->hour=   (uint) ((hour < 0 ? -hour : hour));
+    ltime->minute= (uint) minute;
+    ltime->second= (uint) second;
   }
   else
   {
-    ltime.hour= TIME_MAX_HOUR;
-    ltime.minute= TIME_MAX_MINUTE;
-    ltime.second= TIME_MAX_SECOND;
+    ltime->hour= TIME_MAX_HOUR;
+    ltime->minute= TIME_MAX_MINUTE;
+    ltime->second= TIME_MAX_SECOND;
     char buf[28];
     char *ptr= longlong10_to_str(hour, buf, args[0]->unsigned_flag ? 10 : -10);
-    int len = (int)(ptr - buf) +
-      sprintf(ptr, ":%02u:%02u", (uint) minute, (uint) second);
+    int len = (int)(ptr - buf) + sprintf(ptr, ":%02u:%02u", (uint)minute, (uint)second);
     make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                                  buf, len, MYSQL_TIMESTAMP_TIME,
                                  NullS);
   }
 
-  if (make_time_with_warn((DATE_TIME_FORMAT *) 0, &ltime, str))
-  {
-    null_value= 1;
-    return 0;
-  }
-  return str;
+  return (null_value= 0);
 }
 
 
@@ -3157,7 +2790,7 @@ longlong Item_func_microsecond::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
-  if (!get_arg0_time(&ltime))
+  if (!get_arg0_date(&ltime, TIME_TIME_ONLY))
     return ltime.second_part;
   return 0;
 }
@@ -3172,8 +2805,8 @@ longlong Item_func_timestamp_diff::val_int()
   int neg= 1;
 
   null_value= 0;  
-  if (args[0]->get_date(&ltime1, TIME_NO_ZERO_DATE) ||
-      args[1]->get_date(&ltime2, TIME_NO_ZERO_DATE))
+  if (args[0]->get_date(&ltime1, TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE) ||
+      args[1]->get_date(&ltime2, TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE))
     goto null_date;
 
   if (calc_time_diff(&ltime2,&ltime1, 1,
@@ -3423,9 +3056,9 @@ get_date_time_result_type(const char *format, uint length)
           have all types of date-time components and can end our search.
         */
 	return DATE_TIME_MICROSECOND;
+      }
     }
   }
-  }
 
   /* We don't have all three types of date-time components */
   if (frac_second_used)
@@ -3442,42 +3075,39 @@ get_date_time_result_type(const char *format, uint length)
 
 void Item_func_str_to_date::fix_length_and_dec()
 {
-  maybe_null= 1;
-  decimals=0;
-  cached_format_type= DATE_TIME;
   cached_field_type= MYSQL_TYPE_DATETIME;
-  max_length= MAX_DATETIME_FULL_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-  cached_timestamp_type= MYSQL_TIMESTAMP_NONE;
-  sql_mode= (current_thd->variables.sql_mode &
-             (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE));
+  decimals= NOT_FIXED_DEC;
   if ((const_item= args[1]->const_item()))
   {
     char format_buff[64];
     String format_str(format_buff, sizeof(format_buff), &my_charset_bin);
     String *format= args[1]->val_str(&format_str);
+    decimals= 0;
     if (!args[1]->null_value)
     {
-      cached_format_type= get_date_time_result_type(format->ptr(),
-                                                    format->length());
+      date_time_format_types cached_format_type=
+        get_date_time_result_type(format->ptr(), format->length());
       switch (cached_format_type) {
       case DATE_ONLY:
-        cached_timestamp_type= MYSQL_TIMESTAMP_DATE;
         cached_field_type= MYSQL_TYPE_DATE; 
-        max_length= MAX_DATE_WIDTH * MY_CHARSET_BIN_MB_MAXLEN;
         break;
-      case TIME_ONLY:
       case TIME_MICROSECOND:
-        cached_timestamp_type= MYSQL_TIMESTAMP_TIME;
+        decimals= 6;
+        /* fall through */
+      case TIME_ONLY:
         cached_field_type= MYSQL_TYPE_TIME; 
-        max_length= MAX_TIME_WIDTH * MY_CHARSET_BIN_MB_MAXLEN;
         break;
-      default:
-        cached_timestamp_type= MYSQL_TIMESTAMP_DATETIME;
+      case DATE_TIME_MICROSECOND:
+        decimals= 6;
+        /* fall through */
+      case DATE_TIME:
         cached_field_type= MYSQL_TYPE_DATETIME; 
         break;
       }
     }
   }
+  cached_timestamp_type= mysql_type_to_time_type(cached_field_type);
+  Item_temporal_func::fix_length_and_dec();
 }
 
 
@@ -3486,22 +3116,20 @@ bool Item_func_str_to_date::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
   DATE_TIME_FORMAT date_time_format;
   char val_buff[64], format_buff[64];
   String val_string(val_buff, sizeof(val_buff), &my_charset_bin), *val;
-  String format_str(format_buff, sizeof(format_buff), &my_charset_bin), *format;
+  String format_str(format_buff, sizeof(format_buff), &my_charset_bin),
+    *format;
 
   val=    args[0]->val_str(&val_string);
   format= args[1]->val_str(&format_str);
   if (args[0]->null_value || args[1]->null_value)
-    goto null_date;
+    return (null_value=1);
 
-  null_value= 0;
-  bzero((char*) ltime, sizeof(*ltime));
   date_time_format.format.str=    (char*) format->ptr();
   date_time_format.format.length= format->length();
   if (extract_date_time(&date_time_format, val->ptr(), val->length(),
-			ltime, cached_timestamp_type, 0, "datetime") ||
-      ((fuzzy_date & TIME_NO_ZERO_DATE) &&
-       (ltime->year == 0 || ltime->month == 0 || ltime->day == 0)))
-    goto null_date;
+			ltime, cached_timestamp_type, 0, "datetime",
+                        fuzzy_date))
+    return (null_value=1);
   if (cached_timestamp_type == MYSQL_TIMESTAMP_TIME && ltime->day)
   {
     /*
@@ -3512,57 +3140,7 @@ bool Item_func_str_to_date::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
     ltime->hour+= ltime->day*24;
     ltime->day= 0;
   }
-  return 0;
-
-null_date:
-  if (val && (fuzzy_date & TIME_NO_ZERO_DATE))
-  {
-    char buff[128];
-    strmake(buff, val->ptr(), min(val->length(), sizeof(buff)-1));
-    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                        ER_WRONG_VALUE_FOR_TYPE, ER(ER_WRONG_VALUE_FOR_TYPE),
-                        "datetime", buff, "str_to_date");
-  }
-  return (null_value=1);
-}
-
-
-String *Item_func_str_to_date::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (Item_func_str_to_date::get_date(&ltime, TIME_FUZZY_DATE | sql_mode))
-    return 0;
-
-  if (!make_datetime((const_item ? cached_format_type :
-		     (ltime.second_part ? DATE_TIME_MICROSECOND : DATE_TIME)),
-		     &ltime, str))
-    return str;
-  return 0;
-}
-
-
-longlong Item_func_str_to_date::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (Item_func_str_to_date::get_date(&ltime, TIME_FUZZY_DATE | sql_mode))
-    return 0;
-
-  if (const_item)
-  {
-    switch (cached_field_type) {
-    case MYSQL_TYPE_DATE:
-      return TIME_to_ulonglong_date(&ltime);
-    case MYSQL_TYPE_TIME:
-      return TIME_to_ulonglong_time(&ltime);
-    default:
-      return TIME_to_ulonglong_datetime(&ltime);
-    }
-  }
-  return TIME_to_ulonglong_datetime(&ltime);
+  return (null_value= 0);
 }
 
 
@@ -3570,11 +3148,7 @@ bool Item_func_last_day::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   if (get_arg0_date(ltime, fuzzy_date & ~TIME_FUZZY_DATE) ||
       (ltime->month == 0))
-  {
-    null_value= 1;
-    return 1;
-  }
-  null_value= 0;
+    return (null_value=1);
   uint month_idx= ltime->month-1;
   ltime->day= days_in_month[month_idx];
   if ( month_idx == 1 && calc_days_in_year(ltime->year) == 366)
@@ -3582,5 +3156,5 @@ bool Item_func_last_day::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
   ltime->hour= ltime->minute= ltime->second= 0;
   ltime->second_part= 0;
   ltime->time_type= MYSQL_TIMESTAMP_DATE;
-  return 0;
+  return (null_value= 0);
 }
diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h
index e01b86191b8..239f7e92bba 100644
--- a/sql/item_timefunc.h
+++ b/sql/item_timefunc.h
@@ -1,7 +1,7 @@
 #ifndef ITEM_TIMEFUNC_INCLUDED
 #define ITEM_TIMEFUNC_INCLUDED
-
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -30,8 +30,20 @@ enum date_time_format_types
   TIME_ONLY= 0, TIME_MICROSECOND, DATE_ONLY, DATE_TIME, DATE_TIME_MICROSECOND
 };
 
-bool get_interval_value(Item *args,interval_type int_type,
-			       String *str_value, INTERVAL *interval);
+static inline enum enum_mysql_timestamp_type
+mysql_type_to_time_type(enum enum_field_types mysql_type)
+{
+  switch(mysql_type) {
+  case MYSQL_TYPE_TIME: return MYSQL_TIMESTAMP_TIME;
+  case MYSQL_TYPE_TIMESTAMP:
+  case MYSQL_TYPE_DATETIME: return MYSQL_TIMESTAMP_DATETIME;
+  case MYSQL_TYPE_NEWDATE:
+  case MYSQL_TYPE_DATE: return MYSQL_TIMESTAMP_DATE;
+  default: return MYSQL_TIMESTAMP_ERROR;
+  }
+}
+
+bool get_interval_value(Item *args,interval_type int_type, INTERVAL *interval);
 
 class Item_func_period_add :public Item_int_func
 {
@@ -75,6 +87,7 @@ public:
   enum_monotonicity_info get_monotonicity_info() const;
   longlong val_int_endpoint(bool left_endp, bool *incl_endp);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -128,6 +141,7 @@ public:
     maybe_null=1; 
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -159,6 +173,7 @@ public:
     maybe_null= 1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -175,6 +190,7 @@ public:
   String *val_str(String *str);
   void fix_length_and_dec();
   bool check_partition_func_processor(uchar *int_arg) {return TRUE;}
+  bool check_vcol_func_processor(uchar *int_arg) {return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -195,6 +211,7 @@ public:
     maybe_null= 1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -215,6 +232,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_time_args();
@@ -235,6 +253,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_time_args();
@@ -255,6 +274,7 @@ public:
      maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -275,6 +295,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_time_args();
@@ -309,6 +330,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -331,6 +353,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -364,6 +387,7 @@ public:
     maybe_null= 1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_date_args();
@@ -380,20 +404,43 @@ class Item_func_dayname :public Item_func_weekday
   enum Item_result result_type () const { return STRING_RESULT; }
   void fix_length_and_dec();
   bool check_partition_func_processor(uchar *int_arg) {return TRUE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
-class Item_func_unix_timestamp :public Item_int_func
+class Item_func_seconds_hybrid: public Item_func_numhybrid
 {
-  String value;
 public:
-  Item_func_unix_timestamp() :Item_int_func() {}
-  Item_func_unix_timestamp(Item *a) :Item_int_func(a) {}
-  longlong val_int();
+  Item_func_seconds_hybrid() :Item_func_numhybrid() {}
+  Item_func_seconds_hybrid(Item *a) :Item_func_numhybrid(a) {}
+  void fix_num_length_and_dec()
+  {
+    if (arg_count)
+      decimals= args[0]->decimals;
+    set_if_smaller(decimals, TIME_SECOND_PART_DIGITS);
+    max_length=17 + (decimals ? decimals + 1 : 0);
+  }
+  void find_num_type() { hybrid_type= decimals ? DECIMAL_RESULT : INT_RESULT; }
+  double real_op() { DBUG_ASSERT(0); return 0; }
+  String *str_op(String *str) { DBUG_ASSERT(0); return 0; }
+};
+
+
+class Item_func_unix_timestamp :public Item_func_seconds_hybrid
+{
+  bool get_timestamp_value(my_time_t *seconds, ulong *second_part);
+public:
+  Item_func_unix_timestamp() :Item_func_seconds_hybrid() {}
+  Item_func_unix_timestamp(Item *a) :Item_func_seconds_hybrid(a) {}
   const char *func_name() const { return "unix_timestamp"; }
   enum_monotonicity_info get_monotonicity_info() const;
   longlong val_int_endpoint(bool left_endp, bool *incl_endp);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  void fix_num_length_and_dec()
+  {
+    maybe_null= false;
+    Item_func_seconds_hybrid::fix_num_length_and_dec();
+  }
   /*
     UNIX_TIMESTAMP() depends on the current timezone
     (and thus may not be used as a partitioning function)
@@ -403,161 +450,117 @@ public:
   {
     return !has_timestamp_args();
   }
-  void fix_length_and_dec()
+  bool check_vcol_func_processor(uchar *int_arg) 
   {
-    decimals=0;
-    max_length=10*MY_CHARSET_BIN_MB_MAXLEN;
+    /*
+      TODO: Allow UNIX_TIMESTAMP called with an argument to be a part
+      of the expression for a virtual column
+    */
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
   }
+  longlong int_op();
+  my_decimal *decimal_op(my_decimal* buf);
 };
 
 
-class Item_func_time_to_sec :public Item_int_func
+class Item_func_time_to_sec :public Item_func_seconds_hybrid
 {
 public:
-  Item_func_time_to_sec(Item *item) :Item_int_func(item) {}
-  longlong val_int();
+  Item_func_time_to_sec(Item *item) :Item_func_seconds_hybrid(item) {}
   const char *func_name() const { return "time_to_sec"; }
-  void fix_length_and_dec()
+  void fix_num_length_and_dec()
   {
-    maybe_null= TRUE;
-    decimals=0;
-    max_length=10*MY_CHARSET_BIN_MB_MAXLEN;
+    maybe_null= true;
+    Item_func_seconds_hybrid::fix_num_length_and_dec();
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_time_args();
   }
+  longlong int_op();
+  my_decimal *decimal_op(my_decimal* buf);
 };
 
 
-/*
-  This can't be a Item_str_func, because the val_real() functions are special
-*/
-
-class Item_date :public Item_func
+class Item_temporal_func: public Item_func
 {
+  ulonglong sql_mode;
 public:
-  Item_date() :Item_func() {}
-  Item_date(Item *a) :Item_func(a) {}
+  Item_temporal_func() :Item_func() {}
+  Item_temporal_func(Item *a) :Item_func(a) {}
+  Item_temporal_func(Item *a, Item *b) :Item_func(a,b) {}
+  Item_temporal_func(Item *a, Item *b, Item *c) :Item_func(a,b,c) {}
   enum Item_result result_type () const { return STRING_RESULT; }
-  enum_field_types field_type() const { return MYSQL_TYPE_DATE; }
   CHARSET_INFO *charset_for_protocol(void) const { return &my_charset_bin; }
+  enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; }
   String *val_str(String *str);
   longlong val_int();
-  double val_real() { return val_real_from_decimal(); }
-  const char *func_name() const { return "date"; }
-  void fix_length_and_dec()
-  { 
-    decimals= 0;
-    fix_length_and_charset_datetime(MAX_DATE_WIDTH);
-  }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  bool result_as_longlong() { return TRUE; }
+  double val_real();
+  bool get_date(MYSQL_TIME *res, uint fuzzy_date) { DBUG_ASSERT(0); return 1; }
   my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_date(decimal_value);
-  }
+  { return  val_decimal_from_date(decimal_value); }
+  Field *tmp_table_field(TABLE *table)
+  { return tmp_table_field_from_field_type(table, 0); }
   int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_date_in_field(field);
-  }
+  { return save_date_in_field(field); }
+  void fix_length_and_dec();
 };
 
 
-class Item_date_func :public Item_str_func
+class Item_datefunc :public Item_temporal_func
 {
 public:
-  Item_date_func() :Item_str_func() {}
-  Item_date_func(Item *a) :Item_str_func(a) {}
-  Item_date_func(Item *a,Item *b) :Item_str_func(a,b) {}
-  Item_date_func(Item *a,Item *b, Item *c) :Item_str_func(a,b,c) {}
-  enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; }
-  CHARSET_INFO *charset_for_protocol(void) const { return &my_charset_bin; }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  bool result_as_longlong() { return TRUE; }
-  double val_real() { return (double) val_int(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_date(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_date_in_field(field);
-  }
+  Item_datefunc() :Item_temporal_func() { }
+  Item_datefunc(Item *a) :Item_temporal_func(a) { }
+  enum_field_types field_type() const { return MYSQL_TYPE_DATE; }
 };
 
 
-class Item_str_timefunc :public Item_str_func
+class Item_timefunc :public Item_temporal_func
 {
 public:
-  Item_str_timefunc() :Item_str_func() {}
-  Item_str_timefunc(Item *a) :Item_str_func(a) {}
-  Item_str_timefunc(Item *a,Item *b) :Item_str_func(a,b) {}
-  Item_str_timefunc(Item *a, Item *b, Item *c) :Item_str_func(a, b ,c) {}
+  Item_timefunc() :Item_temporal_func() {}
+  Item_timefunc(Item *a) :Item_temporal_func(a) {}
+  Item_timefunc(Item *a,Item *b) :Item_temporal_func(a,b) {}
+  Item_timefunc(Item *a, Item *b, Item *c) :Item_temporal_func(a, b ,c) {}
   enum_field_types field_type() const { return MYSQL_TYPE_TIME; }
-  CHARSET_INFO *charset_for_protocol(void) const { return &my_charset_bin; }
-  void fix_length_and_dec()
-  {
-    decimals= DATETIME_DEC;
-    fix_length_and_charset_datetime(MAX_TIME_WIDTH);
-  }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  double val_real() { return val_real_from_decimal(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_time(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_time_in_field(field);
-  }
-  longlong val_int() { return val_int_from_decimal(); }
-  bool result_as_longlong() { return TRUE; }
 };
 
 
 /* Abstract CURTIME function. Children should define what time zone is used */
 
-class Item_func_curtime :public Item_str_timefunc
+class Item_func_curtime :public Item_timefunc
 {
-  longlong value;
-  char buff[9*2+32];
-  uint buff_length;
+  MYSQL_TIME ltime;
 public:
-  Item_func_curtime() :Item_str_timefunc() {}
-  Item_func_curtime(Item *a) :Item_str_timefunc(a) {}
-  double val_real() { DBUG_ASSERT(fixed == 1); return (double) value; }
-  longlong val_int() { DBUG_ASSERT(fixed == 1); return value; }
-  String *val_str(String *str);
-  void fix_length_and_dec();
+  Item_func_curtime(uint dec) :Item_timefunc() { decimals= dec; }
+  bool fix_fields(THD *, Item **);
+  void fix_length_and_dec()
+  {
+    store_now_in_TIME(&ltime);
+    Item_timefunc::fix_length_and_dec();
+    maybe_null= false;
+  }
+  bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   /* 
     Abstract method that defines which time zone is used for conversion.
     Converts time current time in my_time_t representation to broken-down
     MYSQL_TIME representation using UTC-SYSTEM or per-thread time zone.
   */
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
-  bool result_as_longlong() { return TRUE; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
 class Item_func_curtime_local :public Item_func_curtime
 {
 public:
-  Item_func_curtime_local() :Item_func_curtime() {}
-  Item_func_curtime_local(Item *a) :Item_func_curtime(a) {}
+  Item_func_curtime_local(uint dec) :Item_func_curtime(dec) {}
   const char *func_name() const { return "curtime"; }
   virtual void store_now_in_TIME(MYSQL_TIME *now_time);
 };
@@ -566,8 +569,7 @@ public:
 class Item_func_curtime_utc :public Item_func_curtime
 {
 public:
-  Item_func_curtime_utc() :Item_func_curtime() {}
-  Item_func_curtime_utc(Item *a) :Item_func_curtime(a) {}
+  Item_func_curtime_utc(uint dec) :Item_func_curtime(dec) {}
   const char *func_name() const { return "utc_time"; }
   virtual void store_now_in_TIME(MYSQL_TIME *now_time);
 };
@@ -575,17 +577,18 @@ public:
 
 /* Abstract CURDATE function. See also Item_func_curtime. */
 
-class Item_func_curdate :public Item_date
+class Item_func_curdate :public Item_datefunc
 {
-  longlong value;
   MYSQL_TIME ltime;
 public:
-  Item_func_curdate() :Item_date() {}
-  longlong val_int() { DBUG_ASSERT(fixed == 1); return (value) ; }
-  String *val_str(String *str);
+  Item_func_curdate() :Item_datefunc() {}
   void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -609,31 +612,32 @@ public:
 
 /* Abstract CURRENT_TIMESTAMP function. See also Item_func_curtime */
 
-class Item_func_now :public Item_date_func
+
+class Item_func_now :public Item_temporal_func
 {
-protected:
-  longlong value;
-  char buff[20*2+32];	// +32 to make my_snprintf_{8bit|ucs2} happy
-  uint buff_length;
   MYSQL_TIME ltime;
 public:
-  Item_func_now() :Item_date_func() {}
-  Item_func_now(Item *a) :Item_date_func(a) {}
-  enum Item_result result_type () const { return STRING_RESULT; }
-  longlong val_int() { DBUG_ASSERT(fixed == 1); return value; }
-  int save_in_field(Field *to, bool no_conversions);
-  String *val_str(String *str);
-  void fix_length_and_dec();
+  Item_func_now(uint dec) :Item_temporal_func() { decimals= dec; }
+  bool fix_fields(THD *, Item **);
+  void fix_length_and_dec()
+  {
+    store_now_in_TIME(&ltime);
+    Item_temporal_func::fix_length_and_dec();
+    maybe_null= false;
+  }
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
 class Item_func_now_local :public Item_func_now
 {
 public:
-  Item_func_now_local() :Item_func_now() {}
-  Item_func_now_local(Item *a) :Item_func_now(a) {}
+  Item_func_now_local(uint dec) :Item_func_now(dec) {}
   const char *func_name() const { return "now"; }
   virtual void store_now_in_TIME(MYSQL_TIME *now_time);
   virtual enum Functype functype() const { return NOW_FUNC; }
@@ -643,8 +647,7 @@ public:
 class Item_func_now_utc :public Item_func_now
 {
 public:
-  Item_func_now_utc() :Item_func_now() {}
-  Item_func_now_utc(Item *a) :Item_func_now(a) {}
+  Item_func_now_utc(uint dec) :Item_func_now(dec) {}
   const char *func_name() const { return "utc_timestamp"; }
   virtual void store_now_in_TIME(MYSQL_TIME *now_time);
 };
@@ -657,32 +660,28 @@ public:
 class Item_func_sysdate_local :public Item_func_now
 {
 public:
-  Item_func_sysdate_local() :Item_func_now() {}
-  Item_func_sysdate_local(Item *a) :Item_func_now(a) {}
+  Item_func_sysdate_local(uint dec) :Item_func_now(dec) {}
   bool const_item() const { return 0; }
   const char *func_name() const { return "sysdate"; }
   void store_now_in_TIME(MYSQL_TIME *now_time);
-  double val_real();
-  longlong val_int();
-  int save_in_field(Field *to, bool no_conversions);
-  String *val_str(String *str);
-  void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   void update_used_tables()
   {
     Item_func_now::update_used_tables();
+    maybe_null= false;
     used_tables_cache|= RAND_TABLE_BIT;
   }
 };
 
 
-class Item_func_from_days :public Item_date
+class Item_func_from_days :public Item_datefunc
 {
 public:
-  Item_func_from_days(Item *a) :Item_date(a) {}
+  Item_func_from_days(Item *a) :Item_datefunc(a) {}
   const char *func_name() const { return "from_days"; }
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return has_date_args() || has_time_args();
@@ -707,13 +706,11 @@ public:
 };
 
 
-class Item_func_from_unixtime :public Item_date_func
+class Item_func_from_unixtime :public Item_temporal_func
 {
   THD *thd;
  public:
-  Item_func_from_unixtime(Item *a) :Item_date_func(a) {}
-  longlong val_int();
-  String *val_str(String *str);
+  Item_func_from_unixtime(Item *a) :Item_temporal_func(a) {}
   const char *func_name() const { return "from_unixtime"; }
   void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
@@ -734,7 +731,7 @@ class Time_zone;
   tables can be used during this function calculation for loading time zone
   descriptions.
 */
-class Item_func_convert_tz :public Item_date_func
+class Item_func_convert_tz :public Item_temporal_func
 {
   /*
     If time zone parameters are constants we are caching objects that
@@ -746,9 +743,7 @@ class Item_func_convert_tz :public Item_date_func
   Time_zone *from_tz, *to_tz;
  public:
   Item_func_convert_tz(Item *a, Item *b, Item *c):
-    Item_date_func(a, b, c), from_tz_cached(0), to_tz_cached(0) {}
-  longlong val_int();
-  String *val_str(String *str);
+    Item_temporal_func(a, b, c), from_tz_cached(0), to_tz_cached(0) {}
   const char *func_name() const { return "convert_tz"; }
   void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
@@ -756,61 +751,34 @@ class Item_func_convert_tz :public Item_date_func
 };
 
 
-class Item_func_sec_to_time :public Item_str_timefunc
+class Item_func_sec_to_time :public Item_timefunc
 {
 public:
-  Item_func_sec_to_time(Item *item) :Item_str_timefunc(item) {}
-  double val_real()
-  {
-    DBUG_ASSERT(fixed == 1);
-    return (double) Item_func_sec_to_time::val_int();
-  }
-  longlong val_int();
-  String *val_str(String *);
+  Item_func_sec_to_time(Item *item) :Item_timefunc(item) {}
+  bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   void fix_length_and_dec()
-  { 
-    Item_str_timefunc::fix_length_and_dec();
-    maybe_null=1;
+  {
+    decimals= args[0]->decimals;
+    Item_timefunc::fix_length_and_dec();
   }
   const char *func_name() const { return "sec_to_time"; }
-  bool result_as_longlong() { return TRUE; }
 };
 
 
-class Item_date_add_interval :public Item_date_func
+class Item_date_add_interval :public Item_temporal_func
 {
-  String value;
   enum_field_types cached_field_type;
-  String ascii_buf;
 public:
   const interval_type int_type; // keep it public
   const bool date_sub_interval; // keep it public
   Item_date_add_interval(Item *a,Item *b,interval_type type_arg,bool neg_arg)
-    :Item_date_func(a,b),int_type(type_arg), date_sub_interval(neg_arg) {}
-  String *val_str_ascii(String *str);
-  String *val_str(String *str)
-  {
-    return val_str_from_val_str_ascii(str, &ascii_buf);
-  }
+    :Item_temporal_func(a,b),int_type(type_arg), date_sub_interval(neg_arg) {}
   const char *func_name() const { return "date_add_interval"; }
   void fix_length_and_dec();
   enum_field_types field_type() const { return cached_field_type; }
-  CHARSET_INFO *charset_for_protocol(void) const
-  {
-    /*
-      DATE_ADD() can return DATE, DATETIME or VARCHAR depending on arguments.
-      Send using "binary" when DATE or DATETIME,
-      or using collation.collation when VARCHAR
-      (which was fixed from @collation_connection in fix_length_and_dec).
-    */
-    DBUG_ASSERT(fixed == 1);
-    return cached_field_type == MYSQL_TYPE_STRING ?
-                                collation.collation : &my_charset_bin;
-  }
-  longlong val_int();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   bool eq(const Item *item, bool binary_cmp) const;
-  virtual void print(String *str, enum_query_type query_type);
+  void print(String *str, enum_query_type query_type);
 };
 
 
@@ -826,8 +794,9 @@ class Item_extract :public Item_int_func
   const char *func_name() const { return "extract"; }
   void fix_length_and_dec();
   bool eq(const Item *item, bool binary_cmp) const;
-  virtual void print(String *str, enum_query_type query_type);
+  void print(String *str, enum_query_type query_type);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     switch (int_type) {
@@ -867,169 +836,84 @@ class Item_extract :public Item_int_func
 };
 
 
-class Item_typecast :public Item_str_func
-{
-public:
-  Item_typecast(Item *a) :Item_str_func(a) {}
-  String *val_str(String *a)
-  {
-    DBUG_ASSERT(fixed == 1);
-    String *tmp=args[0]->val_str(a);
-    null_value=args[0]->null_value;
-    if (tmp)
-      tmp->set_charset(collation.collation);
-    return tmp;
-  }
-  void fix_length_and_dec()
-  {
-    collation.set(&my_charset_bin);
-    max_length=args[0]->max_length;
-  }
-  virtual const char* cast_type() const= 0;
-  virtual void print(String *str, enum_query_type query_type);
-};
-
-
-class Item_typecast_maybe_null :public Item_typecast
-{
-public:
-  Item_typecast_maybe_null(Item *a) :Item_typecast(a) { maybe_null= 1; }
-};
-
-
-class Item_char_typecast :public Item_typecast
+class Item_char_typecast :public Item_str_func
 {
-  int cast_length;
+  uint cast_length;
   CHARSET_INFO *cast_cs, *from_cs;
   bool charset_conversion;
   String tmp_value;
 public:
-  Item_char_typecast(Item *a, int length_arg, CHARSET_INFO *cs_arg)
-    :Item_typecast(a), cast_length(length_arg), cast_cs(cs_arg) {}
+  Item_char_typecast(Item *a, uint length_arg, CHARSET_INFO *cs_arg)
+    :Item_str_func(a), cast_length(length_arg), cast_cs(cs_arg) {}
   enum Functype functype() const { return CHAR_TYPECAST_FUNC; }
   bool eq(const Item *item, bool binary_cmp) const;
   const char *func_name() const { return "cast_as_char"; }
-  const char* cast_type() const { return "char"; };
   String *val_str(String *a);
   void fix_length_and_dec();
-  virtual void print(String *str, enum_query_type query_type);
+  void print(String *str, enum_query_type query_type);
 };
 
 
-class Item_date_typecast :public Item_typecast_maybe_null
+class Item_temporal_typecast: public Item_temporal_func
 {
 public:
-  Item_date_typecast(Item *a) :Item_typecast_maybe_null(a) {}
+  Item_temporal_typecast(Item *a) :Item_temporal_func(a) {}
+  virtual const char *cast_type() const = 0;
+  void print(String *str, enum_query_type query_type);
+  void fix_length_and_dec()
+  {
+    if (decimals == NOT_FIXED_DEC)
+      decimals= args[0]->decimals;
+    Item_temporal_func::fix_length_and_dec();
+  }
+};
+
+class Item_date_typecast :public Item_temporal_typecast
+{
+public:
+  Item_date_typecast(Item *a) :Item_temporal_typecast(a) {}
   const char *func_name() const { return "cast_as_date"; }
-  String *val_str(String *str);
   bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
-  bool get_time(MYSQL_TIME *ltime);
   const char *cast_type() const { return "date"; }
   enum_field_types field_type() const { return MYSQL_TYPE_DATE; }
-  CHARSET_INFO *charset_for_protocol(void) const { return &my_charset_bin; }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  void fix_length_and_dec() { fix_length_and_charset_datetime(10); }
-  bool result_as_longlong() { return TRUE; }
-  longlong val_int();
-  double val_real() { return (double) val_int(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_date(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_date_in_field(field);
-  }
 };
 
 
-class Item_time_typecast :public Item_typecast_maybe_null
+class Item_time_typecast :public Item_temporal_typecast
 {
 public:
-  Item_time_typecast(Item *a) :Item_typecast_maybe_null(a) {}
+  Item_time_typecast(Item *a, uint dec_arg)
+    :Item_temporal_typecast(a) { decimals= dec_arg; }
   const char *func_name() const { return "cast_as_time"; }
-  String *val_str(String *str);
-  bool get_time(MYSQL_TIME *ltime);
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
   const char *cast_type() const { return "time"; }
   enum_field_types field_type() const { return MYSQL_TYPE_TIME; }
-  CHARSET_INFO *charset_for_protocol(void) const { return &my_charset_bin; }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  bool result_as_longlong() { return TRUE; }
-  longlong val_int();
-  double val_real() { return val_real_from_decimal(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_time(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_time_in_field(field);
-  }
-  void fix_length_and_dec()
-  { fix_length_and_charset_datetime(args[0]->max_char_length()); }
 };
 
 
-class Item_datetime_typecast :public Item_typecast_maybe_null
+class Item_datetime_typecast :public Item_temporal_typecast
 {
 public:
-  Item_datetime_typecast(Item *a) :Item_typecast_maybe_null(a) {}
+  Item_datetime_typecast(Item *a, uint dec_arg)
+    :Item_temporal_typecast(a) { decimals= dec_arg; }
   const char *func_name() const { return "cast_as_datetime"; }
-  String *val_str(String *str);
   const char *cast_type() const { return "datetime"; }
   enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; }
-  CHARSET_INFO *charset_for_protocol(void) const { return &my_charset_bin; }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  void fix_length_and_dec()
-  {
-    fix_length_and_charset_datetime(MAX_DATETIME_FULL_WIDTH);
-    decimals= DATETIME_DEC;
-  }
-  bool result_as_longlong() { return TRUE; }
-  longlong val_int();
-  double val_real() { return val_real_from_decimal(); }
-  double val() { return (double) val_int(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_date(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_date_in_field(field);
-  }
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
 };
 
-class Item_func_makedate :public Item_date_func
+
+class Item_func_makedate :public Item_temporal_func
 {
 public:
-  Item_func_makedate(Item *a,Item *b) :Item_date_func(a,b) {}
-  String *val_str(String *str);
+  Item_func_makedate(Item *a,Item *b) :Item_temporal_func(a,b) {}
   const char *func_name() const { return "makedate"; }
   enum_field_types field_type() const { return MYSQL_TYPE_DATE; }
-  CHARSET_INFO *charset_for_protocol(void) const { return &my_charset_bin; }
-  void fix_length_and_dec()
-  { 
-    decimals=0;
-    fix_length_and_charset_datetime(MAX_DATE_WIDTH);
-    maybe_null= 1;
-  }
-  longlong val_int();
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
 };
 
 
-class Item_func_add_time :public Item_str_func
+class Item_func_add_time :public Item_temporal_func
 {
   const bool is_date;
   int sign;
@@ -1037,66 +921,39 @@ class Item_func_add_time :public Item_str_func
 
 public:
   Item_func_add_time(Item *a, Item *b, bool type_arg, bool neg_arg)
-    :Item_str_func(a, b), is_date(type_arg) { sign= neg_arg ? -1 : 1; }
-  String *val_str(String *str);
+    :Item_temporal_func(a, b), is_date(type_arg) { sign= neg_arg ? -1 : 1; }
   enum_field_types field_type() const { return cached_field_type; }
   void fix_length_and_dec();
-  CHARSET_INFO *charset_for_protocol(void) const { return &my_charset_bin; }
-
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  virtual void print(String *str, enum_query_type query_type);
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
+  void print(String *str, enum_query_type query_type);
   const char *func_name() const { return "add_time"; }
-  double val_real() { return val_real_from_decimal(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    if (cached_field_type == MYSQL_TYPE_TIME)
-      return  val_decimal_from_time(decimal_value);
-    if (cached_field_type == MYSQL_TYPE_DATETIME)
-      return  val_decimal_from_date(decimal_value);
-    return Item_str_func::val_decimal(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    if (cached_field_type == MYSQL_TYPE_TIME)
-      return save_time_in_field(field);
-    if (cached_field_type == MYSQL_TYPE_DATETIME)
-      return save_date_in_field(field);
-    return Item_str_func::save_in_field(field, no_conversions);
-  }
-  longlong val_int();
-  MYSQL_TIME *val_datetime(MYSQL_TIME *time, date_time_format_types *format);
 };
 
-class Item_func_timediff :public Item_str_timefunc
+class Item_func_timediff :public Item_timefunc
 {
 public:
   Item_func_timediff(Item *a, Item *b)
-    :Item_str_timefunc(a, b) {}
-  String *val_str(String *str);
+    :Item_timefunc(a, b) {}
   const char *func_name() const { return "timediff"; }
   void fix_length_and_dec()
   {
-    Item_str_timefunc::fix_length_and_dec();
-    maybe_null= 1;
+    decimals= max(args[0]->decimals, args[1]->decimals);
+    Item_timefunc::fix_length_and_dec();
   }
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
 };
 
-class Item_func_maketime :public Item_str_timefunc
+class Item_func_maketime :public Item_timefunc
 {
 public:
   Item_func_maketime(Item *a, Item *b, Item *c)
-    :Item_str_timefunc(a, b, c) 
-  {
-    maybe_null= TRUE;
-  }
-  String *val_str(String *str);
+    :Item_timefunc(a, b, c) 
+  {}
   const char *func_name() const { return "maketime"; }
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
 };
 
+
 class Item_func_microsecond :public Item_int_func
 {
 public:
@@ -1109,6 +966,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
   bool check_valid_arguments_processor(uchar *int_arg)
   {
     return !has_time_args();
@@ -1157,42 +1015,28 @@ public:
 };
 
 
-class Item_func_str_to_date :public Item_str_func
+class Item_func_str_to_date :public Item_temporal_func
 {
   enum_field_types cached_field_type;
-  date_time_format_types cached_format_type;
   timestamp_type cached_timestamp_type;
   bool const_item;
-  ulonglong sql_mode;
 public:
   Item_func_str_to_date(Item *a, Item *b)
-    :Item_str_func(a, b), const_item(false)
+    :Item_temporal_func(a, b), const_item(false)
   {}
-  String *val_str(String *str);
   bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
   const char *func_name() const { return "str_to_date"; }
   enum_field_types field_type() const { return cached_field_type; }
   void fix_length_and_dec();
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 1);
-  }
-  longlong val_int();
-  bool result_as_longlong() { return TRUE; }
 };
 
 
-class Item_func_last_day :public Item_date
+class Item_func_last_day :public Item_datefunc
 {
 public:
-  Item_func_last_day(Item *a) :Item_date(a) {}
+  Item_func_last_day(Item *a) :Item_datefunc(a) {}
   const char *func_name() const { return "last_day"; }
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
-  void fix_length_and_dec()
-  { 
-    Item_date::fix_length_and_dec();
-    maybe_null= 1;
-  }
 };
 
 
diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc
index 79773095515..b93a83c05a9 100644
--- a/sql/item_xmlfunc.cc
+++ b/sql/item_xmlfunc.cc
@@ -228,6 +228,11 @@ public:
     collation.collation= pxml->charset();
   }
   const char *func_name() const { return "nodeset"; }
+  bool check_vcol_func_processor(uchar *int_arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
+
 };
 
 
@@ -534,6 +539,10 @@ public:
   enum Type type() const { return XPATH_NODESET_CMP; };
   const char *func_name() const { return "xpath_nodeset_to_const_comparator"; }
   bool is_bool_func() { return 1; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 
   longlong val_int()
   {
@@ -2057,8 +2066,8 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath)
     Item *prev= xpath->item;
     if (!my_xpath_parse_AndExpr(xpath))
     {
-      return 0;
       xpath->error= 1;
+      return 0;
     }
     xpath->item= new Item_cond_or(nodeset2bool(xpath, prev),
                                   nodeset2bool(xpath, xpath->item));
diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h
index adad6b8e87c..800cf6ed760 100644
--- a/sql/item_xmlfunc.h
+++ b/sql/item_xmlfunc.h
@@ -44,6 +44,10 @@ public:
   }
   void fix_length_and_dec();
   String *parse_xml(String *raw_xml, String *parsed_xml_buf);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
diff --git a/sql/key.cc b/sql/key.cc
index 9d0f86f2565..dd7818119c8 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -106,30 +106,46 @@ int find_ref_key(KEY *key, uint key_count, uchar *record, Field *field,
   @param from_record full record to be copied from
   @param key_info    descriptor of the index
   @param key_length  specifies length of all keyparts that will be copied
+  @param with_zerofill  skipped bytes in the key buffer to be filled with 0
 */
 
 void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
-              uint key_length)
+              uint key_length, bool with_zerofill)
 {
   uint length;
   KEY_PART_INFO *key_part;
 
   if (key_length == 0)
     key_length= key_info->key_length;
-  for (key_part= key_info->key_part; (int) key_length > 0; key_part++)
+  for (key_part= key_info->key_part;
+       (int) key_length > 0;
+       key_part++, to_key+= length, key_length-= length)
   {
     if (key_part->null_bit)
     {
       *to_key++= test(from_record[key_part->null_offset] &
 		   key_part->null_bit);
       key_length--;
+      if (to_key[-1])
+      {
+        /*
+          Don't copy data for null values
+          The -1 below is to subtract the null byte which is already handled
+        */
+        length= min(key_length, (uint) key_part->store_length-1);
+        if (with_zerofill)
+          bzero((char*) to_key, length);
+        continue;
+      }
     }
     if (key_part->key_part_flag & HA_BLOB_PART ||
         key_part->key_part_flag & HA_VAR_LENGTH_PART)
     {
       key_length-= HA_KEY_BLOB_LENGTH;
       length= min(key_length, key_part->length);
-      key_part->field->get_key_image(to_key, length, Field::itRAW);
+      uint bytes= key_part->field->get_key_image(to_key, length, Field::itRAW);
+      if (with_zerofill && bytes < length)
+        bzero((char*) to_key + bytes, length - bytes);
       to_key+= HA_KEY_BLOB_LENGTH;
     }
     else
@@ -141,8 +157,6 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
       if (bytes < length)
         cs->cset->fill(cs, (char*) to_key + bytes, length - bytes, ' ');
     }
-    to_key+= length;
-    key_length-= length;
   }
 }
 
@@ -169,16 +183,28 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
   {
     key_length= key_info->key_length;
   }
-  for (key_part= key_info->key_part ; (int) key_length > 0 ; key_part++)
+  for (key_part= key_info->key_part ;
+       (int) key_length > 0 ;
+       key_part++, from_key+= length, key_length-= length)
   {
     uchar used_uneven_bits= 0;
     if (key_part->null_bit)
     {
-      if (*from_key++)
+      bool null_value; 
+      if ((null_value= *from_key++))
 	to_record[key_part->null_offset]|= key_part->null_bit;
       else
 	to_record[key_part->null_offset]&= ~key_part->null_bit;
       key_length--;
+      if (null_value)
+      {
+        /*
+          Don't copy data for null bytes
+          The -1 below is to subtract the null byte which is already handled
+        */
+        length= min(key_length, (uint) key_part->store_length-1);
+        continue;
+      }
     }
     if (key_part->type == HA_KEYTYPE_BIT)
     {
@@ -232,8 +258,6 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
       memcpy(to_record + key_part->offset, from_key + used_uneven_bits
              , (size_t) length - used_uneven_bits);
     }
-    from_key+= length;
-    key_length-= length;
   }
 }
 
@@ -281,8 +305,7 @@ bool key_cmp_if_same(TABLE *table,const uchar *key,uint idx,uint key_length)
       key++;
       store_length--;
     }
-    if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART |
-                                   HA_BIT_PART))
+    if (!(key_part->key_part_flag & HA_CAN_MEMCMP))
     {
       if (key_part->field->key_cmp(key, key_part->length))
 	return 1;
@@ -579,3 +602,287 @@ next_loop:
   } while (key_info); /* no more keys to test */
   DBUG_RETURN(0);
 }
+
+
+/*
+  Compare two key tuples.
+
+  @brief
+    Compare two key tuples, i.e. two key values in KeyTupleFormat.
+
+  @param part          KEY_PART_INFO with key description
+  @param key1          First key to compare
+  @param key2          Second key to compare 
+  @param tuple_length  Length of key1 (and key2, they are the same) in bytes.
+
+  @return
+    @retval  0  key1 == key2
+    @retval -1  key1 < key2
+    @retval +1  key1 > key2 
+*/
+
+int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, 
+                  uint tuple_length)
+{
+  uchar *key1_end= key1 + tuple_length;
+  int len;
+  int res;
+  LINT_INIT(len);
+  for (;key1 < key1_end; key1 += len, key2 += len, part++)
+  {
+    len= part->store_length;
+    if (part->null_bit)
+    {
+      if (*key1) // key1 == NULL
+      {
+        if (!*key2) // key1(NULL) < key2(notNULL)
+          return -1;
+        continue;
+      }
+      else if (*key2) // key1(notNULL) > key2 (NULL)
+        return 1;
+      /* Step over the NULL bytes for key_cmp() call */
+      key1++;
+      key2++;
+      len--;
+    }
+    if ((res= part->field->key_cmp(key1, key2)))
+      return res;
+  }
+  return 0;
+}
+
+
+/**
+  Get hash value for the key from a key buffer 
+
+  @param  key_info       the key descriptor
+  @param  used_key_part  number of key parts used for the key
+  @param  key            pointer to the buffer with the key value
+
+  @datails
+  When hashing we should take special care only of:
+  1. NULLs (and keyparts which can be null so one byte reserved for it);
+  2. Strings for which we have to take into account their collations
+  and the values of their lengths in the prefixes.
+
+  @return  hash value calculated for the key
+*/
+
+ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key)
+{
+  ulong nr=1, nr2=4;
+  KEY_PART_INFO *key_part= key_info->key_part;
+  KEY_PART_INFO *end_key_part= key_part + used_key_parts;
+
+  for (; key_part < end_key_part; key_part++)
+  {
+    uchar *pos= (uchar*)key;
+    CHARSET_INFO *cs;
+    uint length, pack_length;
+    bool is_string= TRUE;
+    LINT_INIT(cs);
+    LINT_INIT(length);
+    LINT_INIT(pack_length);
+
+    key+= key_part->length;
+    if (key_part->null_bit)
+    {
+      key++;                       /* Skip null byte */
+      if (*pos)                    /* Found null */
+      {
+        nr^= (nr << 1) | 1;
+        /* Add key pack length to key for VARCHAR segments */
+        switch (key_part->type) {
+        case HA_KEYTYPE_VARTEXT1:
+        case HA_KEYTYPE_VARBINARY1:
+        case HA_KEYTYPE_VARTEXT2:
+        case HA_KEYTYPE_VARBINARY2:
+          key+= 2;
+          break;
+        default:
+          ;
+        }
+    continue;
+      }
+      pos++;                       /* Skip null byte */
+    }
+    /* If it is string set parameters of the string */
+    switch (key_part->type) {
+    case HA_KEYTYPE_TEXT:
+      cs= key_part->field->charset();
+      length= key_part->length;
+      pack_length= 0;
+      break;
+    case HA_KEYTYPE_BINARY :
+      cs= &my_charset_bin;
+      length= key_part->length;
+      pack_length= 0;
+      break;
+    case HA_KEYTYPE_VARTEXT1:
+    case HA_KEYTYPE_VARTEXT2:
+      cs= key_part->field->charset();
+      length= uint2korr(pos);
+      pack_length= 2;
+      break;
+    case HA_KEYTYPE_VARBINARY1:
+    case HA_KEYTYPE_VARBINARY2:
+      cs= &my_charset_bin;
+      length= uint2korr(pos);
+      pack_length= 2;
+      break;
+    default:
+      is_string= FALSE;
+    }
+
+    if (is_string)
+    {
+      if (cs->mbmaxlen > 1)
+      {
+        uint char_length= my_charpos(cs, pos + pack_length,
+                                     pos + pack_length + length,
+                                     length / cs->mbmaxlen);
+        set_if_smaller(length, char_length);
+      }
+      cs->coll->hash_sort(cs, pos+pack_length, length, &nr, &nr2);
+      key+= pack_length;
+    }
+    else
+    {
+      for (; pos < (uchar*)key ; pos++)
+      {
+        nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos)) + (nr << 8);
+        nr2+=3;
+      }
+    }
+  }
+  DBUG_PRINT("exit", ("hash: %lx", nr));
+  return(nr);
+}
+
+
+/**
+  Check whether two keys in the key buffers are equal
+
+  @param key_info        the key descriptor
+  @param  used_key_part  number of key parts used for the keys
+  @param key1            pointer to the buffer with the first key 
+  @param key2            pointer to the buffer with the second key 
+
+  @detail See details of key_hashnr().
+
+  @retval TRUE  keys in the buffers are NOT equal
+  @retval FALSE keys in the buffers are equal
+*/
+
+bool key_buf_cmp(KEY *key_info, uint used_key_parts,
+                 const uchar *key1, const uchar *key2)
+{
+  KEY_PART_INFO *key_part= key_info->key_part;
+  KEY_PART_INFO *end_key_part= key_part + used_key_parts;
+
+  for (; key_part < end_key_part; key_part++)
+  {
+    uchar *pos1= (uchar*)key1;
+    uchar *pos2= (uchar*)key2;
+    CHARSET_INFO *cs;
+    uint length1, length2, pack_length;
+    bool is_string= TRUE;
+    LINT_INIT(cs);
+    LINT_INIT(length1);
+    LINT_INIT(length2);
+    LINT_INIT(pack_length);
+
+    key1+= key_part->length;
+    key2+= key_part->length;
+    if (key_part->null_bit)
+    {
+      key1++; key2++;                           /* Skip null byte */
+      if (*pos1 && *pos2)                       /* Both are null */
+      {
+        /* Add key pack length to key for VARCHAR segments */
+        switch (key_part->type) {
+        case HA_KEYTYPE_VARTEXT1:
+        case HA_KEYTYPE_VARBINARY1:
+        case HA_KEYTYPE_VARTEXT2:
+        case HA_KEYTYPE_VARBINARY2:
+          key1+= 2; key2+= 2;
+          break;
+        default:
+          ;
+        }
+        continue;
+      }
+      if (*pos1 != *pos2)
+        return TRUE;
+      pos1++; pos2++;
+    }
+
+    /* If it is string set parameters of the string */
+    switch (key_part->type) {
+    case HA_KEYTYPE_TEXT:
+      cs= key_part->field->charset();
+      length1= length2= key_part->length;
+      pack_length= 0;
+      break;
+    case HA_KEYTYPE_BINARY :
+      cs= &my_charset_bin;
+      length1= length2= key_part->length;
+      pack_length= 0;
+      break;
+    case HA_KEYTYPE_VARTEXT1:
+    case HA_KEYTYPE_VARTEXT2:
+      cs= key_part->field->charset();
+      length1= uint2korr(pos1);
+      length2= uint2korr(pos2);
+      pack_length= 2;
+      break;
+    case HA_KEYTYPE_VARBINARY1:
+    case HA_KEYTYPE_VARBINARY2:
+      cs= &my_charset_bin;
+      length1= uint2korr(pos1);
+      length2= uint2korr(pos2);
+      pack_length= 2;
+      break;
+    default:
+      is_string= FALSE;
+    }
+
+    if (is_string)
+    {
+      /*
+        Compare the strings taking into account length in characters
+        and collation
+      */
+      uint byte_len1= length1, byte_len2= length2;
+      if (cs->mbmaxlen > 1)
+      {
+        uint char_length1= my_charpos(cs, pos1 + pack_length,
+                                      pos1 + pack_length + length1,
+                                      length1 / cs->mbmaxlen);
+        uint char_length2= my_charpos(cs, pos2 + pack_length,
+                                      pos2 + pack_length + length2,
+                                      length2 / cs->mbmaxlen);
+        set_if_smaller(length1, char_length1);
+        set_if_smaller(length2, char_length2);
+      }
+      if (length1 != length2 ||
+          cs->coll->strnncollsp(cs,
+                                pos1 + pack_length, byte_len1,
+                                pos2 + pack_length, byte_len2,
+                                1))
+        return TRUE;
+      key1+= pack_length; key2+= pack_length;
+    }
+    else
+    {
+      /* it is OK to compare non-string byte per byte */
+      for (; pos1 < (uchar*)key1 ; pos1++, pos2++)
+      {
+        if (pos1[0] != pos2[0])
+          return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
diff --git a/sql/key.h b/sql/key.h
index 0c8a4b2db27..8bf6f88fa04 100644
--- a/sql/key.h
+++ b/sql/key.h
@@ -27,13 +27,18 @@ typedef struct st_key_part_info KEY_PART_INFO;
 
 int find_ref_key(KEY *key, uint key_count, uchar *record, Field *field,
                  uint *key_length, uint *keypart);
-void key_copy(uchar *to_key, uchar *from_record, KEY *key_info, uint key_length);
+void key_copy(uchar *to_key, uchar *from_record, KEY *key_info, uint key_length,
+              bool with_zerofill= FALSE);
 void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
                  uint key_length);
 bool key_cmp_if_same(TABLE *form,const uchar *key,uint index,uint key_length);
 void key_unpack(String *to,TABLE *form,uint index);
 bool is_key_used(TABLE *table, uint idx, const MY_BITMAP *fields);
 int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length);
+ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key);
+bool key_buf_cmp(KEY *key_info, uint used_key_parts,
+                 const uchar *key1, const uchar *key2);
 extern "C" int key_rec_cmp(void *key_info, uchar *a, uchar *b);
+int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, uint tuple_length);
 
 #endif /* KEY_INCLUDED */
diff --git a/sql/keycaches.cc b/sql/keycaches.cc
index 5535920d6c2..26a39808c56 100644
--- a/sql/keycaches.cc
+++ b/sql/keycaches.cc
@@ -118,6 +118,7 @@ KEY_CACHE *create_key_cache(const char *name, uint length)
       key_cache->param_block_size=     dflt_key_cache_var.param_block_size;
       key_cache->param_division_limit= dflt_key_cache_var.param_division_limit;
       key_cache->param_age_threshold=  dflt_key_cache_var.param_age_threshold;
+      key_cache->param_partitions=     dflt_key_cache_var.param_partitions;
     }
   }
   DBUG_RETURN(key_cache);
@@ -144,17 +145,18 @@ void free_key_cache(const char *name, KEY_CACHE *key_cache)
 }
 
 
-bool process_key_caches(process_key_cache_t func)
+bool process_key_caches(process_key_cache_t func, void *param)
 {
   I_List_iterator<NAMED_ILINK> it(key_caches);
   NAMED_ILINK *element;
+  int res= 0;
 
   while ((element= it++))
   {
     KEY_CACHE *key_cache= (KEY_CACHE *) element->data;
-    func(element->name, key_cache);
+    res |= func(element->name, key_cache, param);
   }
-  return 0;
+  return res != 0;
 }
 
 #ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
diff --git a/sql/keycaches.h b/sql/keycaches.h
index 10ff27d62c3..04d3f6145e7 100644
--- a/sql/keycaches.h
+++ b/sql/keycaches.h
@@ -21,7 +21,7 @@
 
 extern "C"
 {
-  typedef int (*process_key_cache_t) (const char *, KEY_CACHE *);
+  typedef int (*process_key_cache_t) (const char *, KEY_CACHE *, void *);
 }
 
 class NAMED_ILINK;
@@ -40,6 +40,6 @@ KEY_CACHE *create_key_cache(const char *name, uint length);
 KEY_CACHE *get_key_cache(LEX_STRING *cache_name);
 KEY_CACHE *get_or_create_key_cache(const char *name, uint length);
 void free_key_cache(const char *name, KEY_CACHE *key_cache);
-bool process_key_caches(process_key_cache_t func);
+bool process_key_caches(process_key_cache_t func, void *param);
 
 #endif /* KEYCACHES_INCLUDED */
diff --git a/sql/lex.h b/sql/lex.h
index 4a49858ded7..ea6e9fd9707 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -65,6 +65,7 @@ static SYMBOL symbols[] = {
   { "ALL",		SYM(ALL)},
   { "ALGORITHM",	SYM(ALGORITHM_SYM)},
   { "ALTER",		SYM(ALTER)},
+  { "ALWAYS",           SYM(ALWAYS_SYM)},
   { "ANALYZE",		SYM(ANALYZE_SYM)},
   { "AND",		SYM(AND_SYM)},
   { "ANY",              SYM(ANY_SYM)},
@@ -107,10 +108,12 @@ static SYMBOL symbols[] = {
   { "CHARACTER",	SYM(CHAR_SYM)},
   { "CHARSET",		SYM(CHARSET)},
   { "CHECK",		SYM(CHECK_SYM)},
+  { "CHECKPOINT",	SYM(CHECKPOINT_SYM)},
   { "CHECKSUM",		SYM(CHECKSUM_SYM)},
   { "CIPHER",		SYM(CIPHER_SYM)},
   { "CLASS_ORIGIN",     SYM(CLASS_ORIGIN_SYM)},
   { "CLIENT",		SYM(CLIENT_SYM)},
+  { "CLIENT_STATISTICS", SYM(CLIENT_STATS_SYM)},
   { "CLOSE",		SYM(CLOSE_SYM)},
   { "COALESCE",		SYM(COALESCE)},
   { "CODE",             SYM(CODE_SYM)},
@@ -119,6 +122,12 @@ static SYMBOL symbols[] = {
   { "COLUMN",		SYM(COLUMN_SYM)},
   { "COLUMN_NAME",      SYM(COLUMN_NAME_SYM)},
   { "COLUMNS",		SYM(COLUMNS)},
+  { "COLUMN_ADD",       SYM(COLUMN_ADD_SYM)},
+  { "COLUMN_CREATE",    SYM(COLUMN_CREATE_SYM)},
+  { "COLUMN_DELETE",    SYM(COLUMN_DELETE_SYM)},
+  { "COLUMN_EXISTS",    SYM(COLUMN_EXISTS_SYM)},
+  { "COLUMN_GET",       SYM(COLUMN_GET_SYM)},
+  { "COLUMN_LIST",      SYM(COLUMN_LIST_SYM)},
   { "COMMENT",		SYM(COMMENT_SYM)},
   { "COMMIT",		SYM(COMMIT_SYM)},
   { "COMMITTED",	SYM(COMMITTED_SYM)},
@@ -231,6 +240,7 @@ static SYMBOL symbols[] = {
   { "FULLTEXT",		SYM(FULLTEXT_SYM)},
   { "FUNCTION",		SYM(FUNCTION_SYM)},
   { "GENERAL",          SYM(GENERAL)},
+  { "GENERATED",        SYM(GENERATED_SYM)},
   { "GEOMETRY",		SYM(GEOMETRY_SYM)},
   { "GEOMETRYCOLLECTION",SYM(GEOMETRYCOLLECTION)},
   { "GET_FORMAT",       SYM(GET_FORMAT)},
@@ -257,6 +267,7 @@ static SYMBOL symbols[] = {
   { "IN",		SYM(IN_SYM)},
   { "INDEX",		SYM(INDEX_SYM)},
   { "INDEXES",		SYM(INDEXES)},
+  { "INDEX_STATISTICS",	SYM(INDEX_STATS_SYM)},
   { "INFILE",		SYM(INFILE)},
   { "INITIAL_SIZE",	SYM(INITIAL_SIZE_SYM)},
   { "INNER",		SYM(INNER_SYM)},
@@ -386,6 +397,7 @@ static SYMBOL symbols[] = {
   { "ON",		SYM(ON)},
   { "ONE",              SYM(ONE_SYM)},
   { "ONE_SHOT",		SYM(ONE_SHOT_SYM)},
+  { "ONLINE",		SYM(ONLINE_SYM)},
   { "OPEN",		SYM(OPEN_SYM)},
   { "OPTIMIZE",		SYM(OPTIMIZE)},
   { "OPTIONS",		SYM(OPTIONS_SYM)},
@@ -398,13 +410,16 @@ static SYMBOL symbols[] = {
   { "OUTFILE",		SYM(OUTFILE)},
   { "OWNER",		SYM(OWNER_SYM)},
   { "PACK_KEYS",	SYM(PACK_KEYS_SYM)},
-  { "PARSER",           SYM(PARSER_SYM)},
   { "PAGE",	        SYM(PAGE_SYM)},
+  { "PAGE_CHECKSUM",	SYM(PAGE_CHECKSUM_SYM)},
+  { "PARSER",           SYM(PARSER_SYM)},
+  { "PARSE_VCOL_EXPR",  SYM(PARSE_VCOL_EXPR_SYM)},
   { "PARTIAL",		SYM(PARTIAL)},
   { "PARTITION",        SYM(PARTITION_SYM)},
   { "PARTITIONING",     SYM(PARTITIONING_SYM)},
   { "PARTITIONS",       SYM(PARTITIONS_SYM)},
   { "PASSWORD",		SYM(PASSWORD)},
+  { "PERSISTENT",	SYM(PERSISTENT_SYM)},
   { "PHASE",            SYM(PHASE_SYM)},
   { "PLUGIN",           SYM(PLUGIN_SYM)},
   { "PLUGINS",          SYM(PLUGINS_SYM)},
@@ -546,6 +561,7 @@ static SYMBOL symbols[] = {
   { "TABLE_NAME",       SYM(TABLE_NAME_SYM)},
   { "TABLES",		SYM(TABLES)},
   { "TABLESPACE",	        SYM(TABLESPACE)},
+  { "TABLE_STATISTICS",	SYM(TABLE_STATS_SYM)},
   { "TABLE_CHECKSUM",	SYM(TABLE_CHECKSUM_SYM)},
   { "TEMPORARY",	SYM(TEMPORARY)},
   { "TEMPTABLE",	SYM(TEMPTABLE_SYM)},
@@ -563,6 +579,7 @@ static SYMBOL symbols[] = {
   { "TO",		SYM(TO_SYM)},
   { "TRAILING",		SYM(TRAILING)},
   { "TRANSACTION",	SYM(TRANSACTION_SYM)},
+  { "TRANSACTIONAL",	SYM(TRANSACTIONAL_SYM)},
   { "TRIGGER",          SYM(TRIGGER_SYM)},
   { "TRIGGERS",         SYM(TRIGGERS_SYM)},
   { "TRUE",		SYM(TRUE_SYM)},
@@ -588,6 +605,7 @@ static SYMBOL symbols[] = {
   { "USE",		SYM(USE_SYM)},
   { "USER",		SYM(USER)},
   { "USER_RESOURCES",	SYM(RESOURCES)},
+  { "USER_STATISTICS",	SYM(USER_STATS_SYM)},
   { "USE_FRM",		SYM(USE_FRM)},
   { "USING",		SYM(USING)},
   { "UTC_DATE",         SYM(UTC_DATE_SYM)},
@@ -600,13 +618,15 @@ static SYMBOL symbols[] = {
   { "VARCHARACTER",	SYM(VARCHAR)},
   { "VARIABLES",	SYM(VARIABLES)},
   { "VARYING",		SYM(VARYING)},
+  { "VIA",              SYM(VIA_SYM)},
+  { "VIEW",		SYM(VIEW_SYM)},
+  { "VIRTUAL",          SYM(VIRTUAL_SYM)},
   { "WAIT",		SYM(WAIT_SYM)},
   { "WARNINGS",		SYM(WARNINGS)},
   { "WEEK",		SYM(WEEK_SYM)},
   { "WHEN",		SYM(WHEN_SYM)},
   { "WHERE",		SYM(WHERE)},
   { "WHILE",            SYM(WHILE_SYM)},
-  { "VIEW",		SYM(VIEW_SYM)},
   { "WITH",		SYM(WITH)},
   { "WORK",		SYM(WORK_SYM)},
   { "WRAPPER",		SYM(WRAPPER_SYM)},
diff --git a/sql/lock.cc b/sql/lock.cc
index 65a51bfc0fb..631e5018cf3 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -34,8 +34,8 @@
       This is followed by a call to thr_multi_lock() for all tables.
 
   - When statement is done, we call mysql_unlock_tables().
-    This will call thr_multi_unlock() followed by
-    table_handler->external_lock(thd, F_UNLCK) for each table.
+    table_handler->external_lock(thd, F_UNLCK) followed by
+    thr_multi_unlock() for each table.
 
   - Note that mysql_unlock_tables() may be called several times as
     MySQL in some cases can free some tables earlier than others.
@@ -90,12 +90,6 @@
 
 extern HASH open_cache;
 
-/* flags for get_lock_data */
-#define GET_LOCK_UNLOCK         1
-#define GET_LOCK_STORE_LOCKS    2
-
-static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
-                                 uint flags);
 static int lock_external(THD *thd, TABLE **table,uint count);
 static int unlock_external(THD *thd, TABLE **table,uint count);
 static void print_lock_error(int error, const char *);
@@ -112,6 +106,7 @@ static int thr_lock_errno_to_mysql[]=
   @param flags Lock flags
   @return 0 if all the check passed, non zero if a check failed.
 */
+
 static int
 lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags)
 {
@@ -161,7 +156,7 @@ lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags)
 
       if (t->db_stat & HA_READ_ONLY)
       {
-        my_error(ER_OPEN_AS_READONLY, MYF(0), t->alias);
+        my_error(ER_OPEN_AS_READONLY, MYF(0), t->alias.c_ptr_safe());
         DBUG_RETURN(1);
       }
     }
@@ -217,7 +212,10 @@ lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags)
 /**
   Reset lock type in lock data
 
-  @param mysql_lock Lock structures to reset.
+  @param mysql_lock             Lock structures to reset.
+  @param unlock			If set, then set lock type to TL_UNLOCK,
+  				otherwise set to original lock type from
+				get_store_lock().
 
   @note After a locking error we want to quit the locking of the table(s).
         The test case in the bug report for Bug #18544 has the following
@@ -235,7 +233,7 @@ lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags)
 */
 
 
-static void reset_lock_data(MYSQL_LOCK *sql_lock)
+void reset_lock_data(MYSQL_LOCK *sql_lock, bool unlock)
 {
   THR_LOCK_DATA **ldata, **ldata_end;
   DBUG_ENTER("reset_lock_data");
@@ -244,30 +242,12 @@ static void reset_lock_data(MYSQL_LOCK *sql_lock)
   for (ldata= sql_lock->locks, ldata_end= ldata + sql_lock->lock_count;
        ldata < ldata_end;
        ldata++)
-  {
-    /* Reset lock type. */
-    (*ldata)->type= TL_UNLOCK;
-  }
+    (*ldata)->type= unlock ? TL_UNLOCK : (*ldata)->org_type;
   DBUG_VOID_RETURN;
 }
 
 
 /**
-  Reset lock type in lock data and free.
-
-  @param mysql_lock Lock structures to reset.
-
-*/
-
-static void reset_lock_data_and_free(MYSQL_LOCK **mysql_lock)
-{
-  reset_lock_data(*mysql_lock);
-  my_free(*mysql_lock);
-  *mysql_lock= 0;
-}
-
-
-/**
    Lock tables.
 
    @param thd          The current thread.
@@ -283,12 +263,8 @@ static void reset_lock_data_and_free(MYSQL_LOCK **mysql_lock)
 
 MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags)
 {
-  int rc;
   MYSQL_LOCK *sql_lock;
-  ulong timeout= (flags & MYSQL_LOCK_IGNORE_TIMEOUT) ?
-    LONG_TIMEOUT : thd->variables.lock_wait_timeout;
-
-  DBUG_ENTER("mysql_lock_tables");
+  DBUG_ENTER("mysql_lock_tables(tables)");
 
   if (lock_tables_check(thd, tables, count, flags))
     DBUG_RETURN(NULL);
@@ -296,15 +272,43 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags)
   if (! (sql_lock= get_lock_data(thd, tables, count, GET_LOCK_STORE_LOCKS)))
     DBUG_RETURN(NULL);
 
+  if (mysql_lock_tables(thd, sql_lock, flags))
+  {
+    /* Clear the lock type of all lock data to avoid reusage. */
+    reset_lock_data(sql_lock, 1);
+    my_free(sql_lock);
+    sql_lock= 0;
+  }
+  DBUG_RETURN(sql_lock);
+}
+
+/**
+   Lock tables based on a MYSQL_LOCK structure.
+
+   mysql_lock_tables()
+
+   @param thd			The current thread.
+   @param sql_lock		Tables that should be locked
+   @param flags			See mysql_lock_tables() above
+
+   @return 0   ok
+   @return 1  error
+*/
+
+bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags)
+{
+  int rc= 1;
+  ulong timeout= (flags & MYSQL_LOCK_IGNORE_TIMEOUT) ?
+    LONG_TIMEOUT : thd->variables.lock_wait_timeout;
+
+  DBUG_ENTER("mysql_lock_tables(sql_lock)");
+
   thd_proc_info(thd, "System lock");
-  DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info));
   if (sql_lock->table_count && lock_external(thd, sql_lock->table,
                                              sql_lock->table_count))
-  {
-    /* Clear the lock type of all lock data to avoid reusage. */
-    reset_lock_data_and_free(&sql_lock);
     goto end;
-  }
+
+  thd_proc_info(thd, "Table lock");
 
   /* Copy the lock data array. thr_multi_lock() reorders its contents. */
   memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
@@ -314,29 +318,24 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags)
                                                    sql_lock->lock_count,
                                                    sql_lock->lock_count,
                                                    &thd->lock_info, timeout)];
-  if (rc)
-  {
-    if (sql_lock->table_count)
-      (void) unlock_external(thd, sql_lock->table, sql_lock->table_count);
-    reset_lock_data_and_free(&sql_lock);
-    if (! thd->killed)
-      my_error(rc, MYF(0));
-  }
+  if (rc && sql_lock->table_count)
+    (void) unlock_external(thd, sql_lock->table, sql_lock->table_count);
+
 end:
   thd_proc_info(thd, 0);
 
   if (thd->killed)
   {
     thd->send_kill_message();
-    if (sql_lock)
-    {
-      mysql_unlock_tables(thd, sql_lock);
-      sql_lock= 0;
-    }
+    if (!rc)
+      mysql_unlock_tables(thd, sql_lock, 0);
+    rc= 1;
   }
+  else if (rc)
+    my_error(rc, MYF(0));
 
   thd->set_time_after_lock();
-  DBUG_RETURN(sql_lock);
+  DBUG_RETURN(rc);
 }
 
 
@@ -377,14 +376,15 @@ static int lock_external(THD *thd, TABLE **tables, uint count)
 }
 
 
-void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock)
+void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock, bool free_lock)
 {
   DBUG_ENTER("mysql_unlock_tables");
-  if (sql_lock->lock_count)
-    thr_multi_unlock(sql_lock->locks,sql_lock->lock_count);
   if (sql_lock->table_count)
-    (void) unlock_external(thd,sql_lock->table,sql_lock->table_count);
-  my_free(sql_lock);
+    unlock_external(thd, sql_lock->table, sql_lock->table_count);
+  if (sql_lock->lock_count)
+    thr_multi_unlock(sql_lock->locks, sql_lock->lock_count, 0);
+  if (free_lock)
+    my_free(sql_lock);
   DBUG_VOID_RETURN;
 }
 
@@ -411,25 +411,8 @@ void mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock)
   uint i,found;
   DBUG_ENTER("mysql_unlock_read_tables");
 
-  /* Move all write locks first */
-  THR_LOCK_DATA **lock=sql_lock->locks;
-  for (i=found=0 ; i < sql_lock->lock_count ; i++)
-  {
-    if (sql_lock->locks[i]->type > TL_WRITE_ALLOW_WRITE)
-    {
-      swap_variables(THR_LOCK_DATA *, *lock, sql_lock->locks[i]);
-      lock++;
-      found++;
-    }
-  }
-  /* unlock the read locked tables */
-  if (i != found)
-  {
-    thr_multi_unlock(lock,i-found);
-    sql_lock->lock_count= found;
-  }
+  /* Call external lock for all tables to be unlocked */
 
-  /* Then do the same for the external locks */
   /* Move all write locked tables first */
   TABLE **table=sql_lock->table;
   for (i=found=0 ; i < sql_lock->table_count ; i++)
@@ -448,6 +431,27 @@ void mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock)
     (void) unlock_external(thd,table,i-found);
     sql_lock->table_count=found;
   }
+
+  /* Call thr_unlock() for all tables to be unlocked */
+
+  /* Move all write locks first */
+  THR_LOCK_DATA **lock=sql_lock->locks;
+  for (i=found=0 ; i < sql_lock->lock_count ; i++)
+  {
+    if (sql_lock->locks[i]->type >= TL_WRITE_ALLOW_WRITE)
+    {
+      swap_variables(THR_LOCK_DATA *, *lock, sql_lock->locks[i]);
+      lock++;
+      found++;
+    }
+  }
+  /* unlock the read locked tables */
+  if (i != found)
+  {
+    thr_multi_unlock(lock, i-found, 0);
+    sql_lock->lock_count= found;
+  }
+
   /* Fix the lock positions in TABLE */
   table= sql_lock->table;
   found= 0;
@@ -583,21 +587,36 @@ bool mysql_lock_abort_for_thread(THD *thd, TABLE *table)
 }
 
 
+/**
+  Merge two thr_lock:s
+  mysql_lock_merge()
+
+  @param a	Original locks
+  @param b	New locks
+
+  @retval	New lock structure that contains a and b
+
+  @note
+  a and b are freed with my_free()
+*/
+
 MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b)
 {
   MYSQL_LOCK *sql_lock;
   TABLE **table, **end_table;
   DBUG_ENTER("mysql_lock_merge");
+  DBUG_PRINT("enter", ("a->lock_count: %u  b->lock_count: %u",
+                       a->lock_count, b->lock_count));
 
   if (!(sql_lock= (MYSQL_LOCK*)
 	my_malloc(sizeof(*sql_lock)+
-		  sizeof(THR_LOCK_DATA*)*(a->lock_count+b->lock_count)+
+		  sizeof(THR_LOCK_DATA*)*((a->lock_count+b->lock_count)*2) +
 		  sizeof(TABLE*)*(a->table_count+b->table_count),MYF(MY_WME))))
     DBUG_RETURN(0);				// Fatal error
   sql_lock->lock_count=a->lock_count+b->lock_count;
   sql_lock->table_count=a->table_count+b->table_count;
   sql_lock->locks=(THR_LOCK_DATA**) (sql_lock+1);
-  sql_lock->table=(TABLE**) (sql_lock->locks+sql_lock->lock_count);
+  sql_lock->table=(TABLE**) (sql_lock->locks+sql_lock->lock_count*2);
   memcpy(sql_lock->locks,a->locks,a->lock_count*sizeof(*a->locks));
   memcpy(sql_lock->locks+a->lock_count,b->locks,
 	 b->lock_count*sizeof(*b->locks));
@@ -618,11 +637,21 @@ MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b)
     (*table)->lock_data_start+= a->lock_count;
   }
 
+  /*
+    Ensure that locks of the same tables share same data structures if we
+    reopen a table that is already open. This can happen for example with
+    MERGE tables.
+  */
+
+  /* Copy the lock data array. thr_merge_lock() reorders its content */
+  memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
+         sql_lock->lock_count * sizeof(*sql_lock->locks));
+  thr_merge_locks(sql_lock->locks + sql_lock->lock_count,
+                  a->lock_count, b->lock_count);
+
   /* Delete old, not needed locks */
   my_free(a);
   my_free(b);
-
-  thr_lock_merge_status(sql_lock->locks, sql_lock->lock_count);
   DBUG_RETURN(sql_lock);
 }
 
@@ -662,12 +691,11 @@ static int unlock_external(THD *thd, TABLE **table,uint count)
            - GET_LOCK_STORE_LOCKS : Store lock info in TABLE
 */
 
-static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
-                                 uint flags)
+MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, uint flags)
 {
   uint i,tables,lock_count;
   MYSQL_LOCK *sql_lock;
-  THR_LOCK_DATA **locks, **locks_buf, **locks_start;
+  THR_LOCK_DATA **locks, **locks_buf;
   TABLE **to, **table_buf;
   DBUG_ENTER("get_lock_data");
 
@@ -687,7 +715,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
 
   /*
     Allocating twice the number of pointers for lock data for use in
-    thr_mulit_lock(). This function reorders the lock data, but cannot
+    thr_multi_lock(). This function reorders the lock data, but cannot
     update the table values. So the second part of the array is copied
     from the first part immediately before calling thr_multi_lock().
   */
@@ -705,7 +733,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
   {
     TABLE *table;
     enum thr_lock_type lock_type;
-    THR_LOCK_DATA **org_locks = locks;
+    THR_LOCK_DATA **locks_start;
 
     if ((table=table_ptr[i])->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE)
       continue;
@@ -723,8 +751,14 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
     }
     *to++= table;
     if (locks)
-      for ( ; org_locks != locks ; org_locks++)
-	(*org_locks)->debug_print_param= (void *) table;
+    {
+      for ( ; locks_start != locks ; locks_start++)
+      {
+	(*locks_start)->debug_print_param= (void *) table;
+	(*locks_start)->lock->name=         table->alias.c_ptr();
+	(*locks_start)->org_type=           (*locks_start)->type;
+      }
+    }
   }
   /*
     We do not use 'tables', because there are cases where store_lock()
diff --git a/sql/lock.h b/sql/lock.h
index e0a3382fe42..a4833cdc38e 100644
--- a/sql/lock.h
+++ b/sql/lock.h
@@ -27,7 +27,8 @@ typedef struct st_mysql_lock MYSQL_LOCK;
 
 
 MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **table, uint count, uint flags);
-void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock);
+bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags);
+void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock, bool free_lock= 1);
 void mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock);
 void mysql_unlock_some_tables(THD *thd, TABLE **table,uint count);
 void mysql_lock_remove(THD *thd, MYSQL_LOCK *locked,TABLE *table);
@@ -40,4 +41,11 @@ bool lock_schema_name(THD *thd, const char *db);
 bool lock_object_name(THD *thd, MDL_key::enum_mdl_namespace mdl_type,
                       const char *db, const char *name);
 
+/* flags for get_lock_data */
+#define GET_LOCK_UNLOCK         1
+#define GET_LOCK_STORE_LOCKS    2
+
+MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, uint flags);
+void reset_lock_data(MYSQL_LOCK *sql_lock, bool unlock);
+
 #endif /* LOCK_INCLUDED */
diff --git a/sql/log.cc b/sql/log.cc
index c6b41447d6a..c60cca3a2e9 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -38,6 +39,7 @@
 #include "rpl_filter.h"
 #include "rpl_rli.h"
 #include "sql_audit.h"
+#include "log_slow.h"
 
 #include <my_dir.h>
 #include <stdarg.h>
@@ -49,6 +51,7 @@
 
 #include "sql_plugin.h"
 #include "rpl_handler.h"
+#include "debug_sync.h"
 
 /* max size of the log message */
 #define MAX_LOG_BUFFER_SIZE 1024
@@ -70,6 +73,38 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
 static int binlog_commit(handlerton *hton, THD *thd, bool all);
 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
+static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
+
+static LEX_STRING const write_error_msg=
+    { C_STRING_WITH_LEN("error writing to the binary log") };
+
+static my_bool opt_optimize_thread_scheduling= TRUE;
+ulong binlog_checksum_options;
+#ifndef DBUG_OFF
+static ulong opt_binlog_dbug_fsync_sleep= 0;
+#endif
+
+mysql_mutex_t LOCK_prepare_ordered;
+mysql_mutex_t LOCK_commit_ordered;
+
+static ulonglong binlog_status_var_num_commits;
+static ulonglong binlog_status_var_num_group_commits;
+static char binlog_snapshot_file[FN_REFLEN];
+static ulonglong binlog_snapshot_position;
+
+static SHOW_VAR binlog_status_vars_detail[]=
+{
+  {"commits",
+    (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
+  {"group_commits",
+    (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
+  {"snapshot_file",
+    (char *)&binlog_snapshot_file, SHOW_CHAR},
+  {"snapshot_position",
+   (char *)&binlog_snapshot_position, SHOW_LONGLONG},
+  {NullS, NullS, SHOW_LONG}
+};
+
 
 /**
    purge logs, master and slave sides both, related error code
@@ -147,59 +182,27 @@ sql_print_message_func sql_print_message_handlers[3] =
   sql_print_error
 };
 
-/**
-  Create the name of the log specified.
-
-  This method forms a new path + file name for the
-  log specified in @c name.
 
-  @param[IN] buff    Location for building new string.
-  @param[IN] name    Name of the log file.
-  @param[IN] log_ext The extension for the log (e.g. .log).
-
-  @returns Pointer to new string containing the name.
+/**
+  Create the name of the log file
+  
+  @param[OUT] out    a pointer to a new allocated name will go there
+  @param[IN] log_ext The extension for the file (e.g .log)
+  @param[IN] once    whether to use malloc_once or a normal malloc.
 */
-char *make_log_name(char *buff, const char *name, const char* log_ext)
-{
-  strmake(buff, name, FN_REFLEN-5);
-  return fn_format(buff, buff, mysql_real_data_home, log_ext,
-                   MYF(MY_UNPACK_FILENAME|MY_REPLACE_EXT));
-}
-
-/*
-  Helper class to hold a mutex for the duration of the
-  block.
-
-  Eliminates the need for explicit unlocking of mutexes on, e.g.,
-  error returns.  On passing a null pointer, the sentry will not do
-  anything.
- */
-class Mutex_sentry
+void make_default_log_name(char **out, const char* log_ext, bool once)
 {
-public:
-  Mutex_sentry(mysql_mutex_t *mutex)
-    : m_mutex(mutex)
+  char buff[FN_REFLEN+10];
+  fn_format(buff, opt_log_basename, "", log_ext, MYF(MY_REPLACE_EXT));
+  if (once)
+    *out= my_once_strdup(buff, MYF(MY_WME));
+  else
   {
-    if (m_mutex)
-      mysql_mutex_lock(mutex);
+    my_free(*out);
+    *out= my_strdup(buff, MYF(MY_WME));
   }
+}
 
-  ~Mutex_sentry()
-  {
-    if (m_mutex)
-      mysql_mutex_unlock(m_mutex);
-#ifndef DBUG_OFF
-    m_mutex= 0;
-#endif
-  }
-
-private:
-  mysql_mutex_t *m_mutex;
-
-  // It's not allowed to copy this object in any way
-  Mutex_sentry(Mutex_sentry const&);
-  void operator=(Mutex_sentry const&);
-};
 
 /*
   Helper classes to store non-transactional and transactional data
@@ -421,6 +424,7 @@ public:
                     ulong *param_ptr_binlog_stmt_cache_disk_use,
                     ulong *param_ptr_binlog_cache_use,
                     ulong *param_ptr_binlog_cache_disk_use)
+    : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
   {
      stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size,
                                       param_ptr_binlog_stmt_cache_use,
@@ -428,11 +432,20 @@ public:
      trx_cache.set_binlog_cache_info(param_max_binlog_cache_size,
                                      param_ptr_binlog_cache_use,
                                      param_ptr_binlog_cache_disk_use);
+     last_commit_pos_file[0]= 0;
   }
 
-  void reset_cache(binlog_cache_data* cache_data)
+  void reset(bool do_stmt, bool do_trx)
   {
-    cache_data->reset();
+    if (do_stmt)
+      stmt_cache.reset();
+    if (do_trx)
+    {
+      trx_cache.reset();
+      using_xa= FALSE;
+      last_commit_pos_file[0]= 0;
+      last_commit_pos_offset= 0;
+    }
   }
 
   binlog_cache_data* get_binlog_cache_data(bool is_transactional)
@@ -449,6 +462,23 @@ public:
 
   binlog_cache_data trx_cache;
 
+  /*
+    Binlog position for current transaction.
+    For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
+    position corresponding to the snapshot taken. During (and after) commit,
+    this is set to the binlog position corresponding to just after the
+    commit (so storage engines can store it in their transaction log).
+  */
+  char last_commit_pos_file[FN_REFLEN];
+  my_off_t last_commit_pos_offset;
+
+  /*
+    Flag set true if this transaction is committed with log_xid() as part of
+    XA, false if not.
+  */
+  bool using_xa;
+  my_xid xa_xid;
+
 private:
 
   binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
@@ -551,7 +581,7 @@ void Log_to_csv_event_handler::cleanup()
 */
 
 bool Log_to_csv_event_handler::
-  log_general(THD *thd, time_t event_time, const char *user_host,
+  log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
               uint user_host_len, int thread_id,
               const char *command_type, uint command_type_len,
               const char *sql_text, uint sql_text_len,
@@ -568,6 +598,7 @@ bool Log_to_csv_event_handler::
   Open_tables_backup open_tables_backup;
   ulonglong save_thd_options;
   bool save_time_zone_used;
+  DBUG_ENTER("log_general");
 
   /*
     CSV uses TIME_to_timestamp() internally if table needs to be repaired
@@ -603,7 +634,7 @@ bool Log_to_csv_event_handler::
   need_close= TRUE;
 
   if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
-      table->file->ha_rnd_init(0))
+      table->file->ha_rnd_init_with_error(0))
     goto err;
 
   need_rnd_end= TRUE;
@@ -627,8 +658,8 @@ bool Log_to_csv_event_handler::
 
   DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
 
-  ((Field_timestamp*) table->field[0])->store_timestamp((my_time_t)
-                                                        event_time);
+  ((Field_timestamp*) table->field[0])->store_TIME(
+                  hrtime_to_my_time(event_time), hrtime_sec_part(event_time));
 
   /* do a write */
   if (table->field[1]->store(user_host, user_host_len, client_cs) ||
@@ -681,7 +712,7 @@ err:
 
   thd->variables.option_bits= save_thd_options;
   thd->time_zone_used= save_time_zone_used;
-  return result;
+  DBUG_RETURN(result);
 }
 
 
@@ -692,7 +723,6 @@ err:
     log_slow()
     thd               THD of the query
     current_time      current timestamp
-    query_start_arg   command start timestamp
     user_host         the pointer to the string with user@host info
     user_host_len     length of the user_host string. this is computed once
                       and passed to all general log event handlers
@@ -715,7 +745,7 @@ err:
 */
 
 bool Log_to_csv_event_handler::
-  log_slow(THD *thd, time_t current_time, time_t query_start_arg,
+  log_slow(THD *thd, my_hrtime_t current_time,
            const char *user_host, uint user_host_len,
            ulonglong query_utime, ulonglong lock_utime, bool is_command,
            const char *sql_text, uint sql_text_len)
@@ -729,6 +759,11 @@ bool Log_to_csv_event_handler::
   Open_tables_backup open_tables_backup;
   CHARSET_INFO *client_cs= thd->variables.character_set_client;
   bool save_time_zone_used;
+  long query_time= (long) min(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
+  long lock_time=  (long) min(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
+  long query_time_micro= (long) (query_utime % 1000000);
+  long lock_time_micro=  (long) (lock_utime % 1000000);
+
   DBUG_ENTER("Log_to_csv_event_handler::log_slow");
 
   thd->push_internal_handler(& error_handler);
@@ -749,7 +784,7 @@ bool Log_to_csv_event_handler::
   need_close= TRUE;
 
   if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
-      table->file->ha_rnd_init(0))
+      table->file->ha_rnd_init_with_error(0))
     goto err;
 
   need_rnd_end= TRUE;
@@ -765,45 +800,34 @@ bool Log_to_csv_event_handler::
 
   /* store the time and user values */
   DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
-  ((Field_timestamp*) table->field[0])->store_timestamp((my_time_t)
-                                                        current_time);
+  ((Field_timestamp*) table->field[0])->store_TIME(
+             hrtime_to_my_time(current_time), hrtime_sec_part(current_time));
   if (table->field[1]->store(user_host, user_host_len, client_cs))
     goto err;
 
-  if (query_start_arg)
-  {
-    longlong query_time= (longlong) (query_utime/1000000);
-    longlong lock_time=  (longlong) (lock_utime/1000000);
-    /*
-      A TIME field can not hold the full longlong range; query_time or
-      lock_time may be truncated without warning here, if greater than
-      839 hours (~35 days)
-    */
-    MYSQL_TIME t;
-    t.neg= 0;
+  /*
+    A TIME field can not hold the full longlong range; query_time or
+    lock_time may be truncated without warning here, if greater than
+    839 hours (~35 days)
+  */
+  MYSQL_TIME t;
+  t.neg= 0;
+
+  /* fill in query_time field */
+  calc_time_from_sec(&t, query_time, query_time_micro);
+  if (table->field[2]->store_time(&t))
+    goto err;
+  /* lock_time */
+  calc_time_from_sec(&t, lock_time, lock_time_micro);
+  if (table->field[3]->store_time(&t))
+    goto err;
+  /* rows_sent */
+  if (table->field[4]->store((longlong) thd->sent_row_count, TRUE))
+    goto err;
+  /* rows_examined */
+  if (table->field[5]->store((longlong) thd->examined_row_count, TRUE))
+    goto err;
 
-    /* fill in query_time field */
-    calc_time_from_sec(&t, (long) min(query_time, (longlong) TIME_MAX_VALUE_SECONDS), 0);
-    if (table->field[2]->store_time(&t, MYSQL_TIMESTAMP_TIME))
-      goto err;
-    /* lock_time */
-    calc_time_from_sec(&t, (long) min(lock_time, (longlong) TIME_MAX_VALUE_SECONDS), 0);
-    if (table->field[3]->store_time(&t, MYSQL_TIMESTAMP_TIME))
-      goto err;
-    /* rows_sent */
-    if (table->field[4]->store((longlong) thd->sent_row_count, TRUE))
-      goto err;
-    /* rows_examined */
-    if (table->field[5]->store((longlong) thd->examined_row_count, TRUE))
-      goto err;
-  }
-  else
-  {
-    table->field[2]->set_null();
-    table->field[3]->set_null();
-    table->field[4]->set_null();
-    table->field[5]->set_null();
-  }
   /* fill database field */
   if (thd->db)
   {
@@ -935,14 +959,14 @@ void Log_to_file_event_handler::init_pthread_objects()
 /** Wrapper around MYSQL_LOG::write() for slow log. */
 
 bool Log_to_file_event_handler::
-  log_slow(THD *thd, time_t current_time, time_t query_start_arg,
+  log_slow(THD *thd, my_hrtime_t current_time,
            const char *user_host, uint user_host_len,
            ulonglong query_utime, ulonglong lock_utime, bool is_command,
            const char *sql_text, uint sql_text_len)
 {
   Silence_log_table_errors error_handler;
   thd->push_internal_handler(&error_handler);
-  bool retval= mysql_slow_log.write(thd, current_time, query_start_arg,
+  bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time),
                                     user_host, user_host_len,
                                     query_utime, lock_utime, is_command,
                                     sql_text, sql_text_len);
@@ -957,7 +981,7 @@ bool Log_to_file_event_handler::
 */
 
 bool Log_to_file_event_handler::
-  log_general(THD *thd, time_t event_time, const char *user_host,
+  log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
               uint user_host_len, int thread_id,
               const char *command_type, uint command_type_len,
               const char *sql_text, uint sql_text_len,
@@ -965,7 +989,8 @@ bool Log_to_file_event_handler::
 {
   Silence_log_table_errors error_handler;
   thd->push_internal_handler(&error_handler);
-  bool retval= mysql_log.write(event_time, user_host, user_host_len,
+  bool retval= mysql_log.write(hrtime_to_time(event_time), user_host,
+                               user_host_len,
                                thread_id, command_type, command_type_len,
                                sql_text, sql_text_len);
   thd->pop_internal_handler();
@@ -1100,8 +1125,6 @@ void LOGGER::init_log_tables()
 
 bool LOGGER::flush_logs(THD *thd)
 {
-  int rc= 0;
-
   /*
     Now we lock logger, as nobody should be able to use logging routines while
     log tables are closed
@@ -1113,7 +1136,7 @@ bool LOGGER::flush_logs(THD *thd)
 
   /* end of log flush */
   logger.unlock();
-  return rc;
+  return 0;
 }
 
 
@@ -1200,8 +1223,6 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
 
   if (*slow_log_handler_list)
   {
-    time_t current_time;
-
     /* do not log slow queries from replication threads */
     if (thd->slave_thread && !opt_log_slow_slave_statements)
       return 0;
@@ -1216,21 +1237,17 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
     /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
     user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
                              sctx->priv_user ? sctx->priv_user : "", "[",
-                             sctx->user ? sctx->user : "", "] @ ",
+                             sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
                              sctx->host ? sctx->host : "", " [",
                              sctx->ip ? sctx->ip : "", "]", NullS) -
                     user_host_buff);
 
-    current_time= my_time_possible_from_micro(current_utime);
-    if (thd->start_utime)
-    {
-      query_utime= (current_utime - thd->start_utime);
-      lock_utime=  (thd->utime_after_lock - thd->start_utime);
-    }
-    else
-    {
-      query_utime= lock_utime= 0;
-    }
+    DBUG_ASSERT(thd->start_utime);
+    DBUG_ASSERT(thd->start_time);
+    query_utime= (current_utime - thd->start_utime);
+    lock_utime=  (thd->utime_after_lock - thd->start_utime);
+    my_hrtime_t current_time= { hrtime_from_time(thd->start_time) +
+                                thd->start_time_sec_part + query_utime };
 
     if (!query)
     {
@@ -1239,8 +1256,19 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
       query_length= command_name[thd->command].length;
     }
 
+    if (!query_length) 
+    {
+      /*
+        Not a real query; Reset counts for slow query logging
+        (QQ: Wonder if this is really needed)
+      */
+      thd->sent_row_count= thd->examined_row_count= 0;
+      thd->query_plan_flags= QPLAN_INIT;
+      thd->query_plan_fsort_passes= 0;
+    }
+
     for (current_handler= slow_log_handler_list; *current_handler ;)
-      error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
+      error= (*current_handler++)->log_slow(thd, current_time,
                                             user_host_buff, user_host_len,
                                             query_utime, lock_utime, is_command,
                                             query, query_length) || error;
@@ -1257,7 +1285,7 @@ bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
   Log_event_handler **current_handler= general_log_handler_list;
   char user_host_buff[MAX_USER_HOST_SIZE + 1];
   uint user_host_len= 0;
-  time_t current_time;
+  my_hrtime_t current_time;
 
   DBUG_ASSERT(thd);
 
@@ -1269,9 +1297,9 @@ bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
   }
   user_host_len= make_user_name(thd, user_host_buff);
 
-  current_time= my_time(0);
+  current_time= my_hrtime();
 
-  mysql_audit_general_log(thd, current_time,
+  mysql_audit_general_log(thd, hrtime_to_time(current_time),
                           user_host_buff, user_host_len,
                           command_name[(uint) command].str,
                           command_name[(uint) command].length,
@@ -1436,6 +1464,7 @@ void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
 {
   my_bool *tmp_opt= 0;
   MYSQL_LOG *file_log;
+  LINT_INIT(file_log);
 
   switch (log_type) {
   case QUERY_LOG_SLOW:
@@ -1575,6 +1604,7 @@ int binlog_init(void *p)
   binlog_hton->commit= binlog_commit;
   binlog_hton->rollback= binlog_rollback;
   binlog_hton->prepare= binlog_prepare;
+  binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
   binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
   return 0;
 }
@@ -1590,48 +1620,69 @@ static int binlog_close_connection(handlerton *hton, THD *thd)
   return 0;
 }
 
-/**
+/*
   This function flushes a cache upon commit/rollback.
 
-  @param thd                The thread whose transaction should be flushed
-  @param cache_data         Pointer to the cache
-  @param end_ev             The end event either commit/rollback
-  @param is_transactional   The type of the cache: transactional or
-                            non-transactional
+  SYNOPSIS
+    binlog_flush_cache()
 
-  @return
-    nonzero if an error pops up when flushing the cache.
-*/
-static inline int
-binlog_flush_cache(THD *thd, binlog_cache_data* cache_data, Log_event *end_evt,
-                   bool is_transactional)
+    thd        The thread whose transaction should be ended
+    cache_mngr Pointer to the binlog_cache_mngr to use
+    all        True if the entire transaction should be ended, false if
+               only the statement transaction should be ended.
+    end_ev     The end event to use (COMMIT, ROLLBACK, or commit XID)
+    using_stmt True if the statement cache should be flushed
+    using_trx  True if the transaction cache should be flushed
+
+  DESCRIPTION
+
+    End the currently transaction or statement. The transaction can be either
+    a real transaction or a statement transaction.
+
+    This can be to commit a transaction, with a COMMIT query event or an XA
+    commit XID event. But it can also be to rollback a transaction with a
+    ROLLBACK query event, used for rolling back transactions which also
+    contain updates to non-transactional tables. Or it can be a flush of
+    a statement cache.
+ */
+static int
+binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
+                   Log_event *end_ev, bool all, bool using_stmt,
+                   bool using_trx)
 {
-  DBUG_ENTER("binlog_flush_cache");
   int error= 0;
+  DBUG_ENTER("binlog_flush_cache");
 
-  if (!cache_data->empty())
+  if ((using_stmt && !cache_mngr->stmt_cache.empty()) ||
+      (using_trx && !cache_mngr->trx_cache.empty()))
   {
-    if (thd->binlog_flush_pending_rows_event(TRUE, is_transactional))
+    if (using_stmt && thd->binlog_flush_pending_rows_event(TRUE, FALSE))
       DBUG_RETURN(1);
+    if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE))
+      DBUG_RETURN(1);
+
     /*
       Doing a commit or a rollback including non-transactional tables,
       i.e., ending a transaction where we might write the transaction
       cache to the binary log.
 
       We can always end the statement when ending a transaction since
-      transactions are not allowed inside stored functions. If they
+      transactions are not allowed inside stored functions.  If they
       were, we would have to ensure that we're not ending a statement
       inside a stored function.
     */
-    error= mysql_bin_log.write(thd, &cache_data->cache_log, end_evt,
-                               cache_data->has_incident());
+    error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr,
+                                                     end_ev, all,
+                                                     using_stmt, using_trx);
   }
-  cache_data->reset();
+  cache_mngr->reset(using_stmt, using_trx);
 
-  DBUG_ASSERT(cache_data->empty());
+  DBUG_ASSERT((!using_stmt || cache_mngr->stmt_cache.empty()) &&
+              (!using_trx || cache_mngr->trx_cache.empty()));
   DBUG_RETURN(error);
 }
 
+
 /**
   This function flushes the stmt-cache upon commit.
 
@@ -1642,13 +1693,12 @@ binlog_flush_cache(THD *thd, binlog_cache_data* cache_data, Log_event *end_evt,
     nonzero if an error pops up when flushing the cache.
 */
 static inline int
-binlog_commit_flush_stmt_cache(THD *thd,
+binlog_commit_flush_stmt_cache(THD *thd, bool all,
                                binlog_cache_mngr *cache_mngr)
 {
   Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
-                          FALSE, FALSE, TRUE, 0);
-  return (binlog_flush_cache(thd, &cache_mngr->stmt_cache, &end_evt,
-                             FALSE));
+                          FALSE, TRUE, TRUE, 0);
+  return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
 }
 
 /**
@@ -1661,12 +1711,11 @@ binlog_commit_flush_stmt_cache(THD *thd,
     nonzero if an error pops up when flushing the cache.
 */
 static inline int
-binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr)
+binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
 {
   Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
-                          TRUE, FALSE, TRUE, 0);
-  return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt,
-                             TRUE));
+                          TRUE, TRUE, TRUE, 0);
+  return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
 }
 
 /**
@@ -1679,12 +1728,12 @@ binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr)
     nonzero if an error pops up when flushing the cache.
 */
 static inline int
-binlog_rollback_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr)
+binlog_rollback_flush_trx_cache(THD *thd, bool all,
+                                binlog_cache_mngr *cache_mngr)
 {
   Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"),
-                          TRUE, FALSE, TRUE, 0);
-  return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt,
-                             TRUE));
+                          TRUE, TRUE, TRUE, 0);
+  return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
 }
 
 /**
@@ -1698,12 +1747,26 @@ binlog_rollback_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr)
     nonzero if an error pops up when flushing the cache.
 */
 static inline int
-binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr,
-                              my_xid xid)
+binlog_commit_flush_xid_caches(THD *thd, binlog_cache_mngr *cache_mngr,
+                               bool all, my_xid xid)
 {
-  Xid_log_event end_evt(thd, xid);
-  return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt,
-                             TRUE));
+  if (xid)
+  {
+    Xid_log_event end_evt(thd, xid, TRUE);
+    return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
+  }
+  else
+  {
+    /*
+      Empty xid occurs in XA COMMIT ... ONE PHASE.
+      In this case, we do not have a MySQL xid for the transaction, and the
+      external XA transaction coordinator will have to handle recovery if
+      needed. So we end the transaction with a plain COMMIT query event.
+    */
+    Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
+                            TRUE, TRUE, TRUE, 0);
+    return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
+  }
 }
 
 /**
@@ -1742,11 +1805,11 @@ binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
   if (ending_trans(thd, all))
   {
     if (cache_mngr->trx_cache.has_incident())
-      error= mysql_bin_log.write_incident(thd, TRUE);
+      error= mysql_bin_log.write_incident(thd);
 
     thd->clear_binlog_table_maps();
 
-    cache_mngr->reset_cache(&cache_mngr->trx_cache);
+    cache_mngr->reset(false, true);
   }
   /*
     If rolling back a statement in a transaction, we truncate the
@@ -1765,7 +1828,7 @@ static int binlog_prepare(handlerton *hton, THD *thd, bool all)
     do nothing.
     just pretend we can do 2pc, so that MySQL won't
     switch to 1pc.
-    real work will be done in MYSQL_BIN_LOG::log_xid()
+    real work will be done in MYSQL_BIN_LOG::log_and_order()
   */
   return 0;
 }
@@ -1798,7 +1861,7 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
 
   if (!cache_mngr->stmt_cache.empty())
   {
-    error= binlog_commit_flush_stmt_cache(thd, cache_mngr);
+    error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
   }
 
   if (cache_mngr->trx_cache.empty())
@@ -1806,7 +1869,7 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
     /*
       we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
     */
-    cache_mngr->reset_cache(&cache_mngr->trx_cache);
+    cache_mngr->reset(false, true);
     DBUG_RETURN(error);
   }
 
@@ -1817,7 +1880,7 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
     Otherwise, we accumulate the changes.
   */
   if (!error && ending_trans(thd, all))
-    error= binlog_commit_flush_trx_cache(thd, cache_mngr);
+    error= binlog_commit_flush_trx_cache(thd, all, cache_mngr);
 
   /*
     This is part of the stmt rollback.
@@ -1856,12 +1919,12 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
   */
   if (cache_mngr->stmt_cache.has_incident())
   {
-    error= mysql_bin_log.write_incident(thd, TRUE);
-    cache_mngr->reset_cache(&cache_mngr->stmt_cache);
+    error= mysql_bin_log.write_incident(thd);
+    cache_mngr->reset(true, false);
   }
   else if (!cache_mngr->stmt_cache.empty())
   {
-    error= binlog_commit_flush_stmt_cache(thd, cache_mngr);
+    error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
   }
 
   if (cache_mngr->trx_cache.empty())
@@ -1869,7 +1932,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
     /*
       we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
     */
-    cache_mngr->reset_cache(&cache_mngr->trx_cache);
+    cache_mngr->reset(false, true);
     DBUG_RETURN(error);
   }
 
@@ -1909,7 +1972,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
          (trans_has_updated_non_trans_table(thd) &&
           ending_single_stmt_trans(thd,all) &&
           thd->variables.binlog_format == BINLOG_FORMAT_MIXED)))
-      error= binlog_rollback_flush_trx_cache(thd, cache_mngr);
+      error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
     /*
       Truncate the cache if:
         . aborting a single or multi-statement transaction or;
@@ -1987,6 +2050,7 @@ bool MYSQL_BIN_LOG::check_write_error(THD *thd)
   DBUG_RETURN(checked);
 }
 
+
 /**
   @note
   How do we handle this (unlikely but legal) case:
@@ -2025,7 +2089,7 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
       log_query.append("`"))
     DBUG_RETURN(1);
   int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
-  Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
+  Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
                         TRUE, FALSE, TRUE, errcode);
   DBUG_RETURN(mysql_bin_log.write(&qinfo));
 }
@@ -2049,7 +2113,7 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
         log_query.append("`"))
       DBUG_RETURN(1);
     int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
-    Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
+    Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
                           TRUE, FALSE, TRUE, errcode);
     DBUG_RETURN(mysql_bin_log.write(&qinfo));
   }
@@ -2060,17 +2124,17 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
 
 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
 {
-  char magic[4];
+  uchar magic[4];
   DBUG_ASSERT(my_b_tell(log) == 0);
 
-  if (my_b_read(log, (uchar*) magic, sizeof(magic)))
+  if (my_b_read(log, magic, sizeof(magic)))
   {
     *errmsg = "I/O error reading the header from the binary log";
     sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
 		    log->error);
     return 1;
   }
-  if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
+  if (bcmp(magic, BINLOG_MAGIC, sizeof(magic)))
   {
     *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
     return 1;
@@ -2177,6 +2241,7 @@ static int find_uniq_filename(char *name)
   char			*start, *end;
   int                   error= 0;
   DBUG_ENTER("find_uniq_filename");
+  LINT_INIT(number);
 
   length= dirname_part(buff, name, &buf_length);
   start=  name + length;
@@ -2635,7 +2700,6 @@ err:
 
     thd               THD of the query
     current_time      current timestamp
-    query_start_arg   command start timestamp
     user_host         the pointer to the string with user@host info
     user_host_len     length of the user_host string. this is computed once
                       and passed to all general log event handlers
@@ -2657,7 +2721,7 @@ err:
 */
 
 bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
-                            time_t query_start_arg, const char *user_host,
+                            const char *user_host,
                             uint user_host_len, ulonglong query_utime,
                             ulonglong lock_utime, bool is_command,
                             const char *sql_text, uint sql_text_len)
@@ -2707,19 +2771,39 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
       if (my_b_write(&log_file, (uchar*) "\n", 1))
         tmp_errno= errno;
     }
+    
     /* For slow query log */
     sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
     sprintf(lock_time_buff,  "%.6f", ulonglong2double(lock_utime)/1000000.0);
     if (my_b_printf(&log_file,
-                    "# Query_time: %s  Lock_time: %s"
-                    " Rows_sent: %lu  Rows_examined: %lu\n",
+                    "# Thread_id: %lu  Schema: %s  QC_hit: %s\n" \
+                    "# Query_time: %s  Lock_time: %s  Rows_sent: %lu  Rows_examined: %lu\n",
+                    (ulong) thd->thread_id, (thd->db ? thd->db : ""),
+                    ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
                     query_time_buff, lock_time_buff,
                     (ulong) thd->sent_row_count,
-                    (ulong) thd->examined_row_count) == (uint) -1)
+                    (ulong) thd->examined_row_count) == (size_t) -1)
       tmp_errno= errno;
+     if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) &&
+         (thd->query_plan_flags &
+          (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE |
+           QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK)) &&
+         my_b_printf(&log_file,
+                     "# Full_scan: %s  Full_join: %s  "
+                     "Tmp_table: %s  Tmp_table_on_disk: %s\n"
+                     "# Filesort: %s  Filesort_on_disk: %s  Merge_passes: %lu\n",
+                     ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_TMP_TABLE) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_TMP_DISK) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ?
+                      "Yes" : "No"),
+                     thd->query_plan_fsort_passes) == (size_t) -1)
+       tmp_errno= errno;
     if (thd->db && strcmp(thd->db, db))
     {						// Database changed
-      if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
+      if (my_b_printf(&log_file,"use %s;\n",thd->db) == (size_t) -1)
         tmp_errno= errno;
       strmov(db,thd->db);
     }
@@ -2814,8 +2898,12 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
   :bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
    need_start_event(TRUE),
+   group_commit_queue(0), group_commit_queue_busy(FALSE),
+   num_commits(0), num_group_commits(0),
    sync_period_ptr(sync_period),
    is_relay_log(0), signal_cnt(0),
+   checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
+   relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
    description_event_for_exec(0), description_event_for_queue(0)
 {
   /*
@@ -2863,7 +2951,9 @@ void MYSQL_BIN_LOG::init_pthread_objects()
 {
   MYSQL_LOG::init_pthread_objects();
   mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
+  mysql_mutex_setflags(&LOCK_index, MYF_NO_DEADLOCK_DETECTION);
   mysql_cond_init(m_key_update_cond, &update_cond, 0);
+  mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
 }
 
 
@@ -3029,7 +3119,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
 	an extension for the binary log files.
 	In this case we write a standard header to it.
       */
-      if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
+      if (my_b_safe_write(&log_file, BINLOG_MAGIC,
 			  BIN_LOG_HEADER_SIZE))
         goto err;
       bytes_written+= BIN_LOG_HEADER_SIZE;
@@ -3048,7 +3138,19 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
         as we won't be able to reset it later
       */
       if (io_cache_type == WRITE_CACHE)
-        s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
+        s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
+      s.checksum_alg= is_relay_log ?
+        /* relay-log */
+        /* inherit master's A descriptor if one has been received */
+        (relay_log_checksum_alg= 
+         (relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) ?
+         relay_log_checksum_alg :
+         /* otherwise use slave's local preference of RL events verification */
+         (opt_slave_sql_verify_checksum == 0) ?
+         (uint8) BINLOG_CHECKSUM_ALG_OFF : (uint8) binlog_checksum_options):
+        /* binlog */
+        (uint8) binlog_checksum_options;
+      DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
       if (!s.is_valid())
         goto err;
       s.dont_set_created= null_created_arg;
@@ -3090,6 +3192,11 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
     if (flush_io_cache(&log_file) ||
         mysql_file_sync(log_file.file, MYF(MY_WME)))
       goto err;
+    mysql_mutex_lock(&LOCK_commit_ordered);
+    strmake(last_commit_pos_file, log_file_name,
+            sizeof(last_commit_pos_file)-1);
+    last_commit_pos_offset= my_b_tell(&log_file);
+    mysql_mutex_unlock(&LOCK_commit_ordered);
 
     if (write_file_name_to_index_file)
     {
@@ -3137,11 +3244,7 @@ To turn it on again: fix the cause, \
 shutdown the MySQL server and restart it.", name, errno);
   if (file >= 0)
     mysql_file_close(file, MYF(0));
-  end_io_cache(&log_file);
-  end_io_cache(&index_file);
-  my_free(name);
-  name= NULL;
-  log_state= LOG_CLOSED;
+  close(LOG_CLOSE_INDEX);
   DBUG_RETURN(1);
 }
 
@@ -3374,6 +3477,13 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd)
   ha_reset_logs(thd);
 
   /*
+    We need to get both locks to be sure that no one is trying to
+    write to the index log file.
+  */
+  mysql_mutex_lock(&LOCK_log);
+  mysql_mutex_lock(&LOCK_index);
+
+  /*
     The following mutex is needed to ensure that no threads call
     'delete thd' as we would then risk missing a 'rollback' from this
     thread. If the transaction involved MyISAM tables, it should go
@@ -3381,13 +3491,6 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd)
   */
   mysql_mutex_lock(&LOCK_thread_count);
 
-  /*
-    We need to get both locks to be sure that no one is trying to
-    write to the index log file.
-  */
-  mysql_mutex_lock(&LOCK_log);
-  mysql_mutex_lock(&LOCK_index);
-
   /* Save variables so that we can reopen the log */
   save_name=name;
   name=0;					// Protect against free
@@ -3798,8 +3901,7 @@ int MYSQL_BIN_LOG::close_purge_index_file()
 
 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
 {
-  DBUG_ENTER("MYSQL_BIN_LOG::is_inited_purge_index_file");
-  DBUG_RETURN (my_b_inited(&purge_index_file));
+  return my_b_inited(&purge_index_file);
 }
 
 int MYSQL_BIN_LOG::sync_purge_index_file()
@@ -3835,13 +3937,12 @@ int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
                                      bool need_mutex)
 {
+  DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
   MY_STAT s;
   int error= 0;
   LOG_INFO log_info;
   LOG_INFO check_log_info;
 
-  DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
-
   DBUG_ASSERT(my_b_inited(&purge_index_file));
 
   if ((error=reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, 0)))
@@ -4230,8 +4331,15 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
         We log the whole file name for log file as the user may decide
         to change base names at some point.
       */
-      Rotate_log_event r(new_name+dirname_length(new_name),
-                         0, LOG_EVENT_OFFSET, is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
+      Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
+                         is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
+      /* 
+         The current relay-log's closing Rotate event must have checksum
+         value computed with an algorithm of the last relay-logged FD event.
+      */
+      if (is_relay_log)
+        r.checksum_alg= relay_log_checksum_alg;
+      DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
       if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
          (error= r.write(&log_file)))
       {
@@ -4252,7 +4360,12 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
   old_name=name;
   name=0;				// Don't free name
   close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX);
-
+  if (log_type == LOG_BIN && checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
+  {
+    DBUG_ASSERT(!is_relay_log);
+    DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
+    binlog_checksum_options= checksum_alg_reset;
+  }
   /*
      Note that at this point, log_state != LOG_CLOSED (important for is_open()).
   */
@@ -4394,6 +4507,10 @@ bool MYSQL_BIN_LOG::flush_and_sync(bool *synced)
     err= mysql_file_sync(fd, MYF(MY_WME));
     if (synced)
       *synced= 1;
+#ifndef DBUG_OFF
+    if (opt_binlog_dbug_fsync_sleep > 0)
+      my_sleep(opt_binlog_dbug_fsync_sleep);
+#endif
   }
   return err;
 }
@@ -4654,12 +4771,34 @@ void THD::binlog_set_stmt_begin() {
   cache_mngr->trx_cache.set_prev_position(pos);
 }
 
+static int
+binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
+{
+  int err= 0;
+  DBUG_ENTER("binlog_start_consistent_snapshot");
+
+  thd->binlog_setup_trx_data();
+  binlog_cache_mngr *const cache_mngr=
+    (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+
+  /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
+  strmake(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file,
+          sizeof(cache_mngr->last_commit_pos_file)-1);
+  cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
+
+  trans_register_ha(thd, TRUE, hton);
+
+  DBUG_RETURN(err);
+}
 
 /**
   This function writes a table map to the binary log. 
   Note that in order to keep the signature uniform with related methods,
   we use a redundant parameter to indicate whether a transactional table
   was changed or not.
+
+  If with_annotate != NULL and
+  *with_annotate = TRUE write also Annotate_rows before the table map.
  
   @param table             a pointer to the table.
   @param is_transactional  @c true indicates a transactional table,
@@ -4667,7 +4806,8 @@ void THD::binlog_set_stmt_begin() {
   @return
     nonzero if an error pops up when writing the table map event.
 */
-int THD::binlog_write_table_map(TABLE *table, bool is_transactional)
+int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
+                                my_bool *with_annotate)
 {
   int error;
   DBUG_ENTER("THD::binlog_write_table_map");
@@ -4690,6 +4830,14 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional)
 
   IO_CACHE *file=
     cache_mngr->get_binlog_cache_log(use_trans_cache(this, is_transactional));
+  if (with_annotate && *with_annotate)
+  {
+    Annotate_rows_log_event anno(current_thd, is_transactional);
+    /* Annotate event should be written not more than once */
+    *with_annotate= 0;
+    if ((error= anno.write(file)))
+      DBUG_RETURN(error);
+  }
   if ((error= the_event.write(file)))
     DBUG_RETURN(error);
 
@@ -4844,10 +4992,12 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
 }
 
 /**
-  Write an event to the binary log.
+  Write an event to the binary log. If with_annotate != NULL and
+  *with_annotate = TRUE write also Annotate_rows before the event
+  (this should happen only if the event is a Table_map).
 */
 
-bool MYSQL_BIN_LOG::write(Log_event *event_info)
+bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
 {
   THD *thd= event_info->thd;
   bool error= 1;
@@ -4885,6 +5035,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
   */
   if (likely(is_open()))
   {
+    my_off_t UNINIT_VAR(my_org_b_tell);
 #ifdef HAVE_REPLICATION
     /*
       In the future we need to add to the following if tests like
@@ -4892,7 +5043,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
       binlog_[wild_]{do|ignore}_table?" (WL#1049)"
     */
     const char *local_db= event_info->get_db();
-    if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
+    if ((!(thd->variables.option_bits & OPTION_BIN_LOG)) ||
 	(thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
          thd->lex->sql_command != SQLCOM_SAVEPOINT &&
          !binlog_filter->db_ok(local_db)))
@@ -4904,6 +5055,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
     if (event_info->use_direct_logging())
     {
       file= &log_file;
+      my_org_b_tell= my_b_tell(file);
       mysql_mutex_lock(&LOCK_log);
     }
     else
@@ -4933,10 +5085,22 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
        of the SQL command. If row-based binlogging, Insert_id, Rand
        and other kind of "setting context" events are not needed.
     */
+
+    if (with_annotate && *with_annotate)
+    {
+      DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
+      Annotate_rows_log_event anno(thd, event_info->cache_type);
+      /* Annotate event should be written not more than once */
+      *with_annotate= 0;
+      if (anno.write(file))
+        goto err;
+    }
+
     if (thd)
     {
       if (!thd->is_current_stmt_binlog_format_row())
       {
+
         if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
         {
           Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
@@ -4998,12 +5162,18 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
 err:
     if (event_info->use_direct_logging())
     {
+      my_off_t offset= my_b_tell(file);
+
       if (!error)
       {
         bool synced;
+
         if ((error= flush_and_sync(&synced)))
           goto unlock;
 
+        status_var_add(thd->status_var.binlog_bytes_written,
+                       offset - my_org_b_tell);
+
         if ((error= RUN_HOOK(binlog_storage, after_flush,
                  (thd, log_file_name, file->pos_in_file, synced))))
         {
@@ -5013,7 +5183,15 @@ err:
         signal_update();
         rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
       }
+
 unlock:
+      /*
+        Take mutex to protect against a reader seeing partial writes of 64-bit
+        offset on 32-bit CPUs.
+      */
+      mysql_mutex_lock(&LOCK_commit_ordered);
+      last_commit_pos_offset= offset;
+      mysql_mutex_unlock(&LOCK_commit_ordered);
       mysql_mutex_unlock(&LOCK_log);
     }
 
@@ -5130,12 +5308,14 @@ int MYSQL_BIN_LOG::rotate_and_purge(uint flags)
          We give it a shot and try to write an incident event anyway
          to the current log. 
       */
-      if (!write_incident(current_thd, FALSE))
+      if (!write_incident_already_locked(current_thd))
         flush_and_sync(0);
 
 #ifdef HAVE_REPLICATION
     check_purge= true;
 #endif
+    if (flags & RP_BINLOG_CHECKSUM_ALG_CHANGE)
+      checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF; // done
   }
   if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
     mysql_mutex_unlock(&LOCK_log);
@@ -5164,29 +5344,67 @@ uint MYSQL_BIN_LOG::next_file_id()
 }
 
 
+/**
+  Calculate checksum of possibly a part of an event containing at least
+  the whole common header.
+
+  @param    buf       the pointer to trans cache's buffer
+  @param    off       the offset of the beginning of the event in the buffer
+  @param    event_len no-checksum length of the event
+  @param    length    the current size of the buffer
+
+  @param    crc       [in-out] the checksum
+
+  Event size in incremented by @c BINLOG_CHECKSUM_LEN.
+
+  @return 0 or number of unprocessed yet bytes of the event excluding 
+            the checksum part.
+*/
+  static ulong fix_log_event_crc(uchar *buf, uint off, uint event_len,
+                                 uint length, ha_checksum *crc)
+{
+  ulong ret;
+  uchar *event_begin= buf + off;
+
+  ret= length >= off + event_len ? 0 : off + event_len - length;
+  *crc= my_checksum(*crc, event_begin, event_len - ret); 
+  return ret;
+}
+
 /*
   Write the contents of a cache to the binary log.
 
   SYNOPSIS
     write_cache()
+    thd      Current_thread
     cache    Cache to write to the binary log
-    lock_log True if the LOCK_log mutex should be aquired, false otherwise
-    sync_log True if the log should be flushed and synced
 
   DESCRIPTION
     Write the contents of the cache to the binary log. The cache will
     be reset as a READ_CACHE to be able to read the contents from it.
- */
 
-int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
-{
-  Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
+    Reading from the trans cache with possible (per @c binlog_checksum_options) 
+    adding checksum value  and then fixing the length and the end_log_pos of 
+    events prior to fill in the binlog cache.
+*/
 
+int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
+{
+  mysql_mutex_assert_owner(&LOCK_log);
   if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
     return ER_ERROR_ON_WRITE;
   uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
+  ulong remains= 0; // part of unprocessed yet netto length of the event
   long val;
+  ulong end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
   uchar header[LOG_EVENT_HEADER_LEN];
+  ha_checksum crc= 0, crc_0= 0; // assignments to keep compiler happy
+  my_bool do_checksum= (binlog_checksum_options != BINLOG_CHECKSUM_ALG_OFF);
+  uchar buf[BINLOG_CHECKSUM_LEN];
+
+  // while there is just one alg the following must hold:
+  DBUG_ASSERT(!do_checksum ||
+              binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
 
   /*
     The events in the buffer have incorrect end_log_pos data
@@ -5204,6 +5422,8 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
 
   group= (uint)my_b_tell(&log_file);
   hdr_offs= carry= 0;
+  if (do_checksum)
+    crc= crc_0= my_checksum(0L, NULL, 0);
 
   do
   {
@@ -5216,25 +5436,44 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
       DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
 
       /* assemble both halves */
-      memcpy(&header[carry], (char *)cache->read_pos, LOG_EVENT_HEADER_LEN - carry);
+      memcpy(&header[carry], (char *)cache->read_pos,
+             LOG_EVENT_HEADER_LEN - carry);
 
       /* fix end_log_pos */
-      val= uint4korr(&header[LOG_POS_OFFSET]) + group;
+      val= uint4korr(&header[LOG_POS_OFFSET]) + group +
+        (end_log_pos_inc+= (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
       int4store(&header[LOG_POS_OFFSET], val);
 
+      if (do_checksum)
+      {
+        ulong len= uint4korr(&header[EVENT_LEN_OFFSET]);
+        /* fix len */
+        int4store(&header[EVENT_LEN_OFFSET], len + BINLOG_CHECKSUM_LEN);
+      }
+
       /* write the first half of the split header */
       if (my_b_write(&log_file, header, carry))
         return ER_ERROR_ON_WRITE;
+      status_var_add(thd->status_var.binlog_bytes_written, carry);
 
       /*
         copy fixed second half of header to cache so the correct
         version will be written later.
       */
-      memcpy((char *)cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry);
+      memcpy((char *)cache->read_pos, &header[carry],
+             LOG_EVENT_HEADER_LEN - carry);
 
       /* next event header at ... */
-      hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry;
+      hdr_offs= uint4korr(&header[EVENT_LEN_OFFSET]) - carry -
+        (do_checksum ? BINLOG_CHECKSUM_LEN : 0);
 
+      if (do_checksum)
+      {
+        DBUG_ASSERT(crc == crc_0 && remains == 0);
+        crc= my_checksum(crc, header, carry);
+        remains= uint4korr(header + EVENT_LEN_OFFSET) - carry -
+          BINLOG_CHECKSUM_LEN;
+      }
       carry= 0;
     }
 
@@ -5249,6 +5488,25 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
         very next iteration, just "eventually").
       */
 
+      /* crc-calc the whole buffer */
+      if (do_checksum && hdr_offs >= length)
+      {
+
+        DBUG_ASSERT(remains != 0 && crc != crc_0);
+
+        crc= my_checksum(crc, cache->read_pos, length); 
+        remains -= length;
+        if (my_b_write(&log_file, cache->read_pos, length))
+          return ER_ERROR_ON_WRITE;
+        if (remains == 0)
+        {
+          int4store(buf, crc);
+          if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
+            return ER_ERROR_ON_WRITE;
+          crc= crc_0;
+        }
+      }
+
       while (hdr_offs < length)
       {
         /*
@@ -5256,6 +5514,26 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
           we get the rest.
         */
 
+        if (do_checksum)
+        {
+          if (remains != 0)
+          {
+            /*
+              finish off with remains of the last event that crawls
+              from previous into the current buffer
+            */
+            DBUG_ASSERT(crc != crc_0);
+            crc= my_checksum(crc, cache->read_pos, hdr_offs);
+            int4store(buf, crc);
+            remains -= hdr_offs;
+            DBUG_ASSERT(remains == 0);
+            if (my_b_write(&log_file, cache->read_pos, hdr_offs) ||
+                my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
+              return ER_ERROR_ON_WRITE;
+            crc= crc_0;
+          }
+        }
+
         if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
         {
           carry= length - hdr_offs;
@@ -5265,17 +5543,38 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
         else
         {
           /* we've got a full event-header, and it came in one piece */
-
-          uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET;
+          uchar *ev= (uchar *)cache->read_pos + hdr_offs;
+          uint event_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
+          uchar *log_pos= ev + LOG_POS_OFFSET;
 
           /* fix end_log_pos */
-          val= uint4korr(log_pos) + group;
+          val= uint4korr(log_pos) + group +
+            (end_log_pos_inc += (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
           int4store(log_pos, val);
 
+	  /* fix CRC */
+	  if (do_checksum)
+          {
+            /* fix length */
+            int4store(ev + EVENT_LEN_OFFSET, event_len + BINLOG_CHECKSUM_LEN);
+            remains= fix_log_event_crc(cache->read_pos, hdr_offs, event_len,
+                                       length, &crc);
+            if (my_b_write(&log_file, ev, 
+                           remains == 0 ? event_len : length - hdr_offs))
+              return ER_ERROR_ON_WRITE;
+            if (remains == 0)
+            {
+              int4store(buf, crc);
+              if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
+                return ER_ERROR_ON_WRITE;
+              crc= crc_0; // crc is complete
+            }
+          }
+
           /* next event header at ... */
-          log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET;
-          hdr_offs += uint4korr(log_pos);
+          hdr_offs += event_len; // incr by the netto len
 
+          DBUG_ASSERT(!do_checksum || remains == 0 || hdr_offs >= length);
         }
       }
 
@@ -5291,15 +5590,19 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
     }
 
     /* Write data to the binary log file */
-    if (my_b_write(&log_file, cache->read_pos, length))
-      return ER_ERROR_ON_WRITE;
+    DBUG_EXECUTE_IF("fail_binlog_write_1",
+                    errno= 28; return ER_ERROR_ON_WRITE;);
+    if (!do_checksum)
+      if (my_b_write(&log_file, cache->read_pos, length))
+        return ER_ERROR_ON_WRITE;
+    status_var_add(thd->status_var.binlog_bytes_written, length);
+
     cache->read_pos=cache->read_end;		// Mark buffer used up
   } while ((length= my_b_fill(cache)));
 
   DBUG_ASSERT(carry == 0);
-
-  if (sync_log)
-    return flush_and_sync(0);
+  DBUG_ASSERT(!do_checksum || remains == 0);
+  DBUG_ASSERT(!do_checksum || crc == crc_0);
 
   return 0;                                     // All OK
 }
@@ -5333,30 +5636,50 @@ int query_error_code(THD *thd, bool not_killed)
   return error;
 }
 
-bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
+
+bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
 {
   uint error= 0;
-  DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
-
-  if (!is_open())
-    DBUG_RETURN(error);
-
-  LEX_STRING const write_error_msg=
-    { C_STRING_WITH_LEN("error writing to the binary log") };
+  DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
   Incident incident= INCIDENT_LOST_EVENTS;
   Incident_log_event ev(thd, incident, write_error_msg);
-  if (lock)
-    mysql_mutex_lock(&LOCK_log);
-  error= ev.write(&log_file);
-  if (lock)
+
+  if (likely(is_open()))
+  {
+    error= ev.write(&log_file);
+    status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
+  }
+
+  DBUG_RETURN(error);
+}
+
+
+bool MYSQL_BIN_LOG::write_incident(THD *thd)
+{
+  uint error= 0;
+  my_off_t offset;
+  DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
+
+  mysql_mutex_lock(&LOCK_log);
+  if (likely(is_open()))
   {
-    if (!error && !(error= flush_and_sync(0)))
+    if (!(error= write_incident_already_locked(thd)) &&
+        !(error= flush_and_sync(0)))
     {
       signal_update();
       error= rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
     }
-    mysql_mutex_unlock(&LOCK_log);
+    offset= my_b_tell(&log_file);
+    /*
+      Take mutex to protect against a reader seeing partial writes of 64-bit
+      offset on 32-bit CPUs.
+    */
+    mysql_mutex_lock(&LOCK_commit_ordered);
+    last_commit_pos_offset= offset;
+    mysql_mutex_unlock(&LOCK_commit_ordered);
   }
+  mysql_mutex_unlock(&LOCK_log);
+
   DBUG_RETURN(error);
 }
 
@@ -5384,102 +5707,425 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
     'cache' needs to be reinitialized after this functions returns.
 */
 
-bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
-                          bool incident)
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
+                                           binlog_cache_mngr *cache_mngr,
+                                           Log_event *end_ev, bool all,
+                                           bool using_stmt_cache,
+                                           bool using_trx_cache)
+{
+  group_commit_entry entry;
+  DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
+
+  entry.thd= thd;
+  entry.cache_mngr= cache_mngr;
+  entry.error= 0;
+  entry.all= all;
+  entry.using_stmt_cache= using_stmt_cache;
+  entry.using_trx_cache= using_trx_cache;
+
+  /*
+    Log "BEGIN" at the beginning of every transaction.  Here, a transaction is
+    either a BEGIN..COMMIT block or a single statement in autocommit mode.
+
+    Create the necessary events here, where we have the correct THD (and
+    thread context).
+
+    Due to group commit the actual writing to binlog may happen in a different
+    thread.
+  */
+  Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), using_trx_cache, TRUE,
+                        TRUE, 0);
+  entry.begin_event= &qinfo;
+  entry.end_event= end_ev;
+  if (cache_mngr->stmt_cache.has_incident() ||
+      cache_mngr->trx_cache.has_incident())
+  {
+    Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, write_error_msg);
+    entry.incident_event= &inc_ev;
+    DBUG_RETURN(write_transaction_to_binlog_events(&entry));
+  }
+  else
+  {
+    entry.incident_event= NULL;
+    DBUG_RETURN(write_transaction_to_binlog_events(&entry));
+  }
+}
+
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
+{
+  /*
+    To facilitate group commit for the binlog, we first queue up ourselves in
+    the group commit queue. Then the first thread to enter the queue waits for
+    the LOCK_log mutex, and commits for everyone in the queue once it gets the
+    lock. Any other threads in the queue just wait for the first one to finish
+    the commit and wake them up.
+  */
+
+  entry->thd->clear_wakeup_ready();
+  mysql_mutex_lock(&LOCK_prepare_ordered);
+  group_commit_entry *orig_queue= group_commit_queue;
+  entry->next= orig_queue;
+  group_commit_queue= entry;
+
+  if (entry->cache_mngr->using_xa)
+  {
+    DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
+    run_prepare_ordered(entry->thd, entry->all);
+    DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
+  }
+  mysql_mutex_unlock(&LOCK_prepare_ordered);
+  DEBUG_SYNC(entry->thd, "commit_after_release_LOCK_prepare_ordered");
+
+  /*
+    The first in the queue handle group commit for all; the others just wait
+    to be signalled when group commit is done.
+  */
+  if (orig_queue != NULL)
+    entry->thd->wait_for_wakeup_ready();
+  else
+    trx_group_commit_leader(entry);
+
+  if (!opt_optimize_thread_scheduling)
+  {
+    /* For the leader, trx_group_commit_leader() already took the lock. */
+    if (orig_queue != NULL)
+      mysql_mutex_lock(&LOCK_commit_ordered);
+
+    DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
+    ++num_commits;
+    if (entry->cache_mngr->using_xa && !entry->error)
+      run_commit_ordered(entry->thd, entry->all);
+
+    group_commit_entry *next= entry->next;
+    if (!next)
+    {
+      group_commit_queue_busy= FALSE;
+      mysql_cond_signal(&COND_queue_busy);
+      DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
+    }
+    mysql_mutex_unlock(&LOCK_commit_ordered);
+
+    if (next)
+    {
+      next->thd->signal_wakeup_ready();
+    }
+  }
+
+  if (likely(!entry->error))
+    return 0;
+
+  switch (entry->error)
+  {
+  case ER_ERROR_ON_WRITE:
+    my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno);
+    break;
+  case ER_ERROR_ON_READ:
+    my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
+             entry->error_cache->file_name, entry->commit_errno);
+    break;
+  default:
+    /*
+      There are not (and should not be) any errors thrown not covered above.
+      But just in case one is added later without updating the above switch
+      statement, include a catch-all.
+    */
+    my_printf_error(entry->error,
+                    "Error writing transaction to binary log: %d",
+                    MYF(ME_NOREFRESH), entry->error);
+  }
+
+  /*
+    Since we return error, this transaction XID will not be committed, so
+    we need to mark it as not needed for recovery (unlog() is not called
+    for a transaction if log_xid() fails).
+  */
+  if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid)
+    mark_xid_done();
+
+  return 1;
+}
+
+/*
+  Do binlog group commit as the lead thread.
+
+  This must be called when this statement/transaction is queued at the start of
+  the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
+  commit all the transactions in the queue (more may have entered while waiting
+  for LOCK_log). After commit is done, all other threads in the queue will be
+  signalled.
+
+ */
+void
+MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
 {
-  DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
+  uint xid_count= 0;
+  my_off_t commit_offset;
+  group_commit_entry *current;
+  group_commit_entry *last_in_queue;
+  DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
+  LINT_INIT(commit_offset);
+
+  /*
+    Lock the LOCK_log(), and once we get it, collect any additional writes
+    that queued up while we were waiting.
+  */
   mysql_mutex_lock(&LOCK_log);
+  DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
 
+  mysql_mutex_lock(&LOCK_prepare_ordered);
+  current= group_commit_queue;
+  group_commit_queue= NULL;
+  mysql_mutex_unlock(&LOCK_prepare_ordered);
+
+  /* As the queue is in reverse order of entering, reverse it. */
+  group_commit_entry *queue= NULL;
+  last_in_queue= current;
+  while (current)
+  {
+    group_commit_entry *next= current->next;
+    current->next= queue;
+    queue= current;
+    current= next;
+  }
+  DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
+
+  /* Now we have in queue the list of transactions to be committed in order. */
   DBUG_ASSERT(is_open());
   if (likely(is_open()))                       // Should always be true
   {
     /*
-      We only bother to write to the binary log if there is anything
-      to write.
-     */
-    if (my_b_tell(cache) > 0)
+      Commit every transaction in the queue.
+
+      Note that we are doing this in a different thread than the one running
+      the transaction! So we are limited in the operations we can do. In
+      particular, we cannot call my_error() on behalf of a transaction, as
+      that obtains the THD from thread local storage. Instead, we must set
+      current->error and let the thread do the error reporting itself once
+      we wake it up.
+    */
+    for (current= queue; current != NULL; current= current->next)
     {
+      binlog_cache_mngr *cache_mngr= current->cache_mngr;
+
       /*
-        Log "BEGIN" at the beginning of every transaction.  Here, a
-        transaction is either a BEGIN..COMMIT block or a single
-        statement in autocommit mode.
+        We already checked before that at least one cache is non-empty; if both
+        are empty we would have skipped calling into here.
       */
-      Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0);
-      if (qinfo.write(&log_file))
-        goto err;
-      DBUG_EXECUTE_IF("crash_before_writing_xid",
-                      {
-                        if ((write_error= write_cache(cache, false, true)))
-                          DBUG_PRINT("info", ("error writing binlog cache: %d",
-                                               write_error));
-                        DBUG_PRINT("info", ("crashing before writing xid"));
-                        DBUG_SUICIDE();
-                      });
-
-      if ((write_error= write_cache(cache, false, false)))
-        goto err;
+      DBUG_ASSERT(!cache_mngr->stmt_cache.empty() || !cache_mngr->trx_cache.empty());
 
-      if (commit_event && commit_event->write(&log_file))
-        goto err;
+      current->error= write_transaction_or_stmt(current);
 
-      if (incident && write_incident(thd, FALSE))
-        goto err;
+      strmake(cache_mngr->last_commit_pos_file, log_file_name,
+              sizeof(cache_mngr->last_commit_pos_file)-1);
+      commit_offset= my_b_write_tell(&log_file);
+      cache_mngr->last_commit_pos_offset= commit_offset;
+      if (cache_mngr->using_xa && cache_mngr->xa_xid)
+        xid_count++;
+    }
 
-      bool synced= 0;
-      if (flush_and_sync(&synced))
-        goto err;
-      DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
-      if (cache->error)				// Error on read
+    bool synced= 0;
+    if (flush_and_sync(&synced))
+    {
+      for (current= queue; current != NULL; current= current->next)
       {
-        sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
-        write_error=1;				// Don't give more errors
-        goto err;
+        if (!current->error)
+        {
+          current->error= ER_ERROR_ON_WRITE;
+          current->commit_errno= errno;
+          current->error_cache= NULL;
+        }
       }
-
-      if (RUN_HOOK(binlog_storage, after_flush,
-                   (thd, log_file_name, log_file.pos_in_file, synced)))
+    }
+    else
+    {
+      bool any_error= false;
+      bool all_error= true;
+      for (current= queue; current != NULL; current= current->next)
       {
-        sql_print_error("Failed to run 'after_flush' hooks");
-        write_error=1;
-        goto err;
+        if (!current->error &&
+            RUN_HOOK(binlog_storage, after_flush,
+                (current->thd, log_file_name, log_file.pos_in_file, synced)))
+        {
+          current->error= ER_ERROR_ON_WRITE;
+          current->commit_errno= -1;
+          current->error_cache= NULL;
+          any_error= true;
+        }
+        else
+          all_error= false;
       }
 
-      signal_update();
+      if (any_error)
+        sql_print_error("Failed to run 'after_flush' hooks");
+      if (!all_error)
+        signal_update();
     }
 
     /*
-      if commit_event is Xid_log_event, increase the number of
-      prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
+      if any commit_events are Xid_log_event, increase the number of
+      prepared_xids (it's decreased in ::unlog()). Binlog cannot be rotated
       if there're prepared xids in it - see the comment in new_file() for
       an explanation.
-      If the commit_event is not Xid_log_event (then it's a Query_log_event)
-      rotate binlog, if necessary.
+      If no Xid_log_events (then it's all Query_log_event) rotate binlog,
+      if necessary.
     */
-    if (commit_event && commit_event->get_type_code() == XID_EVENT)
+    if (xid_count > 0)
     {
-      mysql_mutex_lock(&LOCK_prep_xids);
-      prepared_xids++;
-      mysql_mutex_unlock(&LOCK_prep_xids);
+      mark_xids_active(xid_count);
     }
     else
+    {
       if (rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED))
-        goto err;
+      {
+        /*
+          If we fail to rotate, which thread should get the error?
+          We give the error to the *last* transaction thread; that seems to
+          make the most sense, as it was the last to write to the log.
+        */
+        last_in_queue->error= ER_ERROR_ON_WRITE;
+        last_in_queue->commit_errno= errno;
+      }
+    }
   }
+
+  DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
+  mysql_mutex_lock(&LOCK_commit_ordered);
+  last_commit_pos_offset= commit_offset;
+  /*
+    We cannot unlock LOCK_log until we have locked LOCK_commit_ordered;
+    otherwise scheduling could allow the next group commit to run ahead of us,
+    messing up the order of commit_ordered() calls. But as soon as
+    LOCK_commit_ordered is obtained, we can let the next group commit start.
+  */
   mysql_mutex_unlock(&LOCK_log);
+  DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
+  ++num_group_commits;
 
-  DBUG_RETURN(0);
+  if (!opt_optimize_thread_scheduling)
+  {
+    /*
+      If we want to run commit_ordered() each in the transaction's own thread
+      context, then we need to mark the queue reserved; we need to finish all
+      threads in one group commit before the next group commit can be allowed
+      to proceed, and we cannot unlock a simple pthreads mutex in a different
+      thread from the one that locked it.
+    */
 
-err:
-  if (!write_error)
+    while (group_commit_queue_busy)
+      mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
+    group_commit_queue_busy= TRUE;
+
+    /* Note that we return with LOCK_commit_ordered locked! */
+    DBUG_VOID_RETURN;
+  }
+
+  /*
+    Wakeup each participant waiting for our group commit, first calling the
+    commit_ordered() methods for any transactions doing 2-phase commit.
+  */
+  current= queue;
+  while (current != NULL)
   {
-    write_error= 1;
-    sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+    group_commit_entry *next;
+
+    DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
+    ++num_commits;
+    if (current->cache_mngr->using_xa && !current->error)
+      run_commit_ordered(current->thd, current->all);
+
+    /*
+      Careful not to access current->next after waking up the other thread! As
+      it may change immediately after wakeup.
+    */
+    next= current->next;
+    if (current != leader)                      // Don't wake up ourself
+      current->thd->signal_wakeup_ready();
+    current= next;
   }
-  mysql_mutex_unlock(&LOCK_log);
-  DBUG_RETURN(1);
+  DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
+  mysql_mutex_unlock(&LOCK_commit_ordered);
+
+  DBUG_VOID_RETURN;
 }
 
 
+int
+MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry)
+{
+  binlog_cache_mngr *mngr= entry->cache_mngr;
+
+  if (entry->begin_event->write(&log_file))
+    return ER_ERROR_ON_WRITE;
+  status_var_add(entry->thd->status_var.binlog_bytes_written,
+                 entry->begin_event->data_written);
+
+  if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
+      write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
+  {
+    entry->error_cache= &mngr->stmt_cache.cache_log;
+    entry->commit_errno= errno;
+    return ER_ERROR_ON_WRITE;
+  }
+
+  if (entry->using_trx_cache && !mngr->trx_cache.empty())
+  {
+    DBUG_EXECUTE_IF("crash_before_writing_xid",
+                    {
+                      if ((write_cache(entry->thd,
+                                       mngr->get_binlog_cache_log(TRUE))))
+                        DBUG_PRINT("info", ("error writing binlog cache"));
+                      else
+                        flush_and_sync(0);
+
+                      DBUG_PRINT("info", ("crashing before writing xid"));
+                      DBUG_SUICIDE();
+                    });
+
+    if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE)))
+    {
+      entry->error_cache= &mngr->trx_cache.cache_log;
+      entry->commit_errno= errno;
+      return ER_ERROR_ON_WRITE;
+    }
+  }
+
+  if (entry->end_event->write(&log_file))
+  {
+    entry->error_cache= NULL;
+    entry->commit_errno= errno;
+    return ER_ERROR_ON_WRITE;
+  }
+  status_var_add(entry->thd->status_var.binlog_bytes_written,
+                 entry->end_event->data_written);
+
+  if (entry->incident_event)
+  {
+    if (entry->incident_event->write(&log_file))
+    {
+      entry->error_cache= NULL;
+      entry->commit_errno= errno;
+      return ER_ERROR_ON_WRITE;
+    }
+  }
+
+  if (mngr->get_binlog_cache_log(FALSE)->error) // Error on read
+  {
+    entry->error_cache= &mngr->stmt_cache.cache_log;
+    entry->commit_errno= errno;
+    return ER_ERROR_ON_READ;
+  }
+  if (mngr->get_binlog_cache_log(TRUE)->error)  // Error on read
+  {
+    entry->error_cache= &mngr->trx_cache.cache_log;
+    entry->commit_errno= errno;
+    return ER_ERROR_ON_READ;
+  }
+
+  return 0;
+}
+
 /**
   Wait until we get a signal that the relay log has been updated.
 
@@ -5561,6 +6207,11 @@ void MYSQL_BIN_LOG::close(uint exiting)
 	(exiting & LOG_CLOSE_STOP_EVENT))
     {
       Stop_log_event s;
+      // the checksumming rule for relay-log case is similar to Rotate
+        s.checksum_alg= is_relay_log ?
+          (uint8) relay_log_checksum_alg : (uint8) binlog_checksum_options;
+      DBUG_ASSERT(!is_relay_log ||
+                  relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
       s.write(&log_file);
       bytes_written+= s.data_written;
       signal_update();
@@ -5677,9 +6328,23 @@ static bool test_if_number(register const char *str,
 
 void sql_perror(const char *message)
 {
-#ifdef HAVE_STRERROR
+#if defined(_WIN32)
+  char* buf;
+  DWORD dw= GetLastError();
+  if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |  FORMAT_MESSAGE_FROM_SYSTEM |
+        FORMAT_MESSAGE_IGNORE_INSERTS,  NULL, dw,
+        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL ) > 0)
+  {
+    sql_print_error("%s: %s",message, buf);
+    LocalFree((HLOCAL)buf);
+  }
+  else
+  {
+    sql_print_error("%s", message);
+  }
+#elif defined(HAVE_STRERROR)
   sql_print_error("%s: %s",message, strerror(errno));
-#else
+#else 
   perror(message);
 #endif
 }
@@ -5884,42 +6549,184 @@ void sql_print_information(const char *format, ...)
 }
 
 
+void
+TC_LOG::run_prepare_ordered(THD *thd, bool all)
+{
+  Ha_trx_info *ha_info=
+    all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
+
+  mysql_mutex_assert_owner(&LOCK_prepare_ordered);
+  for (; ha_info; ha_info= ha_info->next())
+  {
+    handlerton *ht= ha_info->ht();
+    if (!ht->prepare_ordered)
+      continue;
+    ht->prepare_ordered(ht, thd, all);
+  }
+}
+
+
+void
+TC_LOG::run_commit_ordered(THD *thd, bool all)
+{
+  Ha_trx_info *ha_info=
+    all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
+
+  mysql_mutex_assert_owner(&LOCK_commit_ordered);
+  for (; ha_info; ha_info= ha_info->next())
+  {
+    handlerton *ht= ha_info->ht();
+    if (!ht->commit_ordered)
+      continue;
+    ht->commit_ordered(ht, thd, all);
+    DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
+  }
+}
+
+
+int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
+                               bool need_prepare_ordered,
+                               bool need_commit_ordered)
+{
+  int cookie;
+  struct commit_entry entry;
+  bool is_group_commit_leader;
+  LINT_INIT(is_group_commit_leader);
+
+  if (need_prepare_ordered)
+  {
+    mysql_mutex_lock(&LOCK_prepare_ordered);
+    run_prepare_ordered(thd, all);
+    if (need_commit_ordered)
+    {
+      /*
+        Must put us in queue so we can run_commit_ordered() in same sequence
+        as we did run_prepare_ordered().
+      */
+      thd->clear_wakeup_ready();
+      entry.thd= thd;
+      commit_entry *previous_queue= commit_ordered_queue;
+      entry.next= previous_queue;
+      commit_ordered_queue= &entry;
+      is_group_commit_leader= (previous_queue == NULL);
+    }
+    mysql_mutex_unlock(&LOCK_prepare_ordered);
+  }
+
+  cookie= 0;
+  if (xid)
+    cookie= log_one_transaction(xid);
+
+  if (need_commit_ordered)
+  {
+    if (need_prepare_ordered)
+    {
+      /*
+        We did the run_prepare_ordered() serialised, then ran the log_xid() in
+        parallel. Now we have to do run_commit_ordered() serialised in the
+        same sequence as run_prepare_ordered().
+
+        We do this starting from the head of the queue, each thread doing
+        run_commit_ordered() and signalling the next in queue.
+      */
+      if (is_group_commit_leader)
+      {
+        /* The first in queue starts the ball rolling. */
+        mysql_mutex_lock(&LOCK_prepare_ordered);
+        while (commit_ordered_queue_busy)
+          mysql_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
+        commit_entry *queue= commit_ordered_queue;
+        commit_ordered_queue= NULL;
+        /*
+          Mark the queue busy while we bounce it from one thread to the
+          next.
+        */
+        commit_ordered_queue_busy= true;
+        mysql_mutex_unlock(&LOCK_prepare_ordered);
+
+        /* Reverse the queue list so we get correct order. */
+        commit_entry *prev= NULL;
+        while (queue)
+        {
+          commit_entry *next= queue->next;
+          queue->next= prev;
+          prev= queue;
+          queue= next;
+        }
+        DBUG_ASSERT(prev == &entry && prev->thd == thd);
+      }
+      else
+      {
+        /* Not first in queue; just wait until previous thread wakes us up. */
+        thd->wait_for_wakeup_ready();
+      }
+    }
+
+    /* Only run commit_ordered() if log_xid was successful. */
+    if (cookie)
+    {
+      mysql_mutex_lock(&LOCK_commit_ordered);
+      run_commit_ordered(thd, all);
+      mysql_mutex_unlock(&LOCK_commit_ordered);
+    }
+
+    if (need_prepare_ordered)
+    {
+      commit_entry *next= entry.next;
+      if (next)
+      {
+        next->thd->signal_wakeup_ready();
+      }
+      else
+      {
+        mysql_mutex_lock(&LOCK_prepare_ordered);
+        commit_ordered_queue_busy= false;
+        mysql_cond_signal(&COND_queue_busy);
+        mysql_mutex_unlock(&LOCK_prepare_ordered);
+      }
+    }
+  }
+
+  return cookie;
+}
+
+
 /********* transaction coordinator log for 2pc - mmap() based solution *******/
 
 /*
-  the log consists of a file, mmapped to a memory.
-  file is divided on pages of tc_log_page_size size.
-  (usable size of the first page is smaller because of log header)
-  there's PAGE control structure for each page
-  each page (or rather PAGE control structure) can be in one of three
-  states - active, syncing, pool.
-  there could be only one page in active or syncing states,
-  but many in pool - pool is fifo queue.
-  usual lifecycle of a page is pool->active->syncing->pool
-  "active" page - is a page where new xid's are logged.
-  the page stays active as long as syncing slot is taken.
-  "syncing" page is being synced to disk. no new xid can be added to it.
-  when the sync is done the page is moved to a pool and an active page
+  the log consists of a file, mapped to memory.
+  file is divided into pages of tc_log_page_size size.
+  (usable size of the first page is smaller because of the log header)
+  there is a PAGE control structure for each page
+  each page (or rather its PAGE control structure) can be in one of
+  the three states - active, syncing, pool.
+  there could be only one page in the active or syncing state,
+  but many in pool - pool is a fifo queue.
+  the usual lifecycle of a page is pool->active->syncing->pool.
+  the "active" page is a page where new xid's are logged.
+  the page stays active as long as the syncing slot is taken.
+  the "syncing" page is being synced to disk. no new xid can be added to it.
+  when the syncing is done the page is moved to a pool and an active page
   becomes "syncing".
 
   the result of such an architecture is a natural "commit grouping" -
   If commits are coming faster than the system can sync, they do not
-  stall. Instead, all commit that came since the last sync are
-  logged to the same page, and they all are synced with the next -
+  stall. Instead, all commits that came since the last sync are
+  logged to the same "active" page, and they all are synced with the next -
   one - sync. Thus, thought individual commits are delayed, throughput
   is not decreasing.
 
-  when a xid is added to an active page, the thread of this xid waits
+  when an xid is added to an active page, the thread of this xid waits
   for a page's condition until the page is synced. when syncing slot
   becomes vacant one of these waiters is awaken to take care of syncing.
   it syncs the page and signals all waiters that the page is synced.
   PAGE::waiters is used to count these waiters, and a page may never
   become active again until waiters==0 (that is all waiters from the
-  previous sync have noticed the sync was completed)
+  previous sync have noticed that the sync was completed)
 
   note, that the page becomes "dirty" and has to be synced only when a
   new xid is added into it. Removing a xid from a page does not make it
-  dirty - we don't sync removals to disk.
+  dirty - we don't sync xid removals to disk.
 */
 
 ulong tc_log_page_waits= 0;
@@ -5928,7 +6735,7 @@ ulong tc_log_page_waits= 0;
 
 #define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
 
-static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
+static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
 
 ulong opt_tc_log_size= TC_LOG_MIN_SIZE;
 ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
@@ -5986,7 +6793,8 @@ int TC_LOG_MMAP::open(const char *opt_name)
   inited=2;
 
   npages=(uint)file_length/tc_log_page_size;
-  DBUG_ASSERT(npages >= 3);             // to guarantee non-empty pool
+  if (npages < 3)             // to guarantee non-empty pool
+    goto err;
   if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
     goto err;
   inited=3;
@@ -5997,9 +6805,9 @@ int TC_LOG_MMAP::open(const char *opt_name)
     pg->state=PS_POOL;
     mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST);
     mysql_cond_init(key_PAGE_cond, &pg->cond, 0);
-    pg->start=(my_xid *)(data + i*tc_log_page_size);
-    pg->end=(my_xid *)(pg->start + tc_log_page_size);
+    pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size);
     pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
+    pg->end=pg->start + pg->size;
   }
   pages[0].size=pages[0].free=
                 (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
@@ -6020,6 +6828,7 @@ int TC_LOG_MMAP::open(const char *opt_name)
   mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
   mysql_cond_init(key_COND_active, &COND_active, 0);
   mysql_cond_init(key_COND_pool, &COND_pool, 0);
+  mysql_cond_init(key_COND_queue_busy, &COND_queue_busy, 0);
 
   inited=6;
 
@@ -6027,6 +6836,8 @@ int TC_LOG_MMAP::open(const char *opt_name)
   active=pages;
   pool=pages+1;
   pool_last=pages+npages-1;
+  commit_ordered_queue= NULL;
+  commit_ordered_queue_busy= false;
 
   return 0;
 
@@ -6043,7 +6854,7 @@ err:
     -# if there're waiters - take the one with the most free space.
 
   @todo
-    TODO page merging. try to allocate adjacent page first,
+    page merging. try to allocate adjacent page first,
     so that they can be flushed both in one sync
 */
 
@@ -6052,8 +6863,7 @@ void TC_LOG_MMAP::get_active_from_pool()
   PAGE **p, **best_p=0;
   int best_free;
 
-  if (syncing)
-    mysql_mutex_lock(&LOCK_pool);
+  mysql_mutex_lock(&LOCK_pool);
 
   do
   {
@@ -6073,20 +6883,21 @@ void TC_LOG_MMAP::get_active_from_pool()
   }
   while ((*best_p == 0 || best_free == 0) && overflow());
 
+  mysql_mutex_assert_owner(&LOCK_active);
   active=*best_p;
-  if (active->free == active->size) // we've chosen an empty page
-  {
-    tc_log_cur_pages_used++;
-    set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
-  }
 
   if ((*best_p)->next)              // unlink the page from the pool
     *best_p=(*best_p)->next;
   else
     pool_last=*best_p;
+  mysql_mutex_unlock(&LOCK_pool);
 
-  if (syncing)
-    mysql_mutex_unlock(&LOCK_pool);
+  mysql_mutex_lock(&active->lock);
+  if (active->free == active->size) // we've chosen an empty page
+  {
+    tc_log_cur_pages_used++;
+    set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
+  }
 }
 
 /**
@@ -6132,7 +6943,7 @@ int TC_LOG_MMAP::overflow()
     to the position in memory where xid was logged to.
 */
 
-int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
+int TC_LOG_MMAP::log_one_transaction(my_xid xid)
 {
   int err;
   PAGE *p;
@@ -6141,7 +6952,7 @@ int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
   mysql_mutex_lock(&LOCK_active);
 
   /*
-    if active page is full - just wait...
+    if the active page is full - just wait...
     frankly speaking, active->free here accessed outside of mutex
     protection, but it's safe, because it only means we may miss an
     unlog() for the active page, and we're not waiting for it here -
@@ -6153,9 +6964,17 @@ int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
   /* no active page ? take one from the pool */
   if (active == 0)
     get_active_from_pool();
+  else
+    mysql_mutex_lock(&active->lock);
 
   p=active;
-  mysql_mutex_lock(&p->lock);
+
+  /*
+    p->free is always > 0 here because to decrease it one needs
+    to take p->lock and before it one needs to take LOCK_active.
+    But checked that active->free > 0 under LOCK_active and
+    haven't release it ever since
+  */
 
   /* searching for an empty slot */
   while (*p->ptr)
@@ -6169,38 +6988,51 @@ int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
   *p->ptr++= xid;
   p->free--;
   p->state= PS_DIRTY;
-
-  /* to sync or not to sync - this is the question */
-  mysql_mutex_unlock(&LOCK_active);
-  mysql_mutex_lock(&LOCK_sync);
   mysql_mutex_unlock(&p->lock);
 
+  mysql_mutex_lock(&LOCK_sync);
   if (syncing)
   {                                          // somebody's syncing. let's wait
+    mysql_mutex_unlock(&LOCK_active);
+    mysql_mutex_lock(&p->lock);
     p->waiters++;
-    /*
-      note - it must be while (), not do ... while () here
-      as p->state may be not PS_DIRTY when we come here
-    */
-    while (p->state == PS_DIRTY && syncing)
+    for (;;)
+    {
+      int not_dirty = p->state != PS_DIRTY;
+      mysql_mutex_unlock(&p->lock);
+      if (not_dirty || !syncing)
+        break;
       mysql_cond_wait(&p->cond, &LOCK_sync);
+      mysql_mutex_lock(&p->lock);
+    }
     p->waiters--;
     err= p->state == PS_ERROR;
     if (p->state != PS_DIRTY)                   // page was synced
     {
-      if (p->waiters == 0)
-        mysql_cond_signal(&COND_pool);       // in case somebody's waiting
       mysql_mutex_unlock(&LOCK_sync);
+      if (p->waiters == 0)
+        mysql_cond_signal(&COND_pool);     // in case somebody's waiting
+      mysql_mutex_unlock(&p->lock);
       goto done;                             // we're done
     }
-  }                                          // page was not synced! do it now
-  DBUG_ASSERT(active == p && syncing == 0);
-  mysql_mutex_lock(&LOCK_active);
-  syncing=p;                                 // place is vacant - take it
-  active=0;                                  // page is not active anymore
-  mysql_cond_broadcast(&COND_active);        // in case somebody's waiting
-  mysql_mutex_unlock(&LOCK_active);
-  mysql_mutex_unlock(&LOCK_sync);
+    DBUG_ASSERT(!syncing);
+    mysql_mutex_unlock(&p->lock);
+    syncing = p;
+    mysql_mutex_unlock(&LOCK_sync);
+
+    mysql_mutex_lock(&LOCK_active);
+    active=0;                                  // page is not active anymore
+    mysql_cond_broadcast(&COND_active);
+    mysql_mutex_unlock(&LOCK_active);
+  }
+  else
+  {
+    syncing = p;                               // place is vacant - take it
+    mysql_mutex_unlock(&LOCK_sync);
+    active = 0;                                // page is not active anymore
+    mysql_cond_broadcast(&COND_active);
+    mysql_mutex_unlock(&LOCK_active);
+  }
   err= sync();
 
 done:
@@ -6217,7 +7049,7 @@ int TC_LOG_MMAP::sync()
     sit down and relax - this can take a while...
     note - no locks are held at this point
   */
-  err= my_msync(fd, syncing->start, 1, MS_SYNC);
+  err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC);
 
   /* page is synced. let's move it to the pool */
   mysql_mutex_lock(&LOCK_pool);
@@ -6225,14 +7057,23 @@ int TC_LOG_MMAP::sync()
   pool_last=syncing;
   syncing->next=0;
   syncing->state= err ? PS_ERROR : PS_POOL;
-  mysql_cond_broadcast(&syncing->cond);      // signal "sync done"
-  mysql_cond_signal(&COND_pool);             // in case somebody's waiting
+  mysql_cond_signal(&COND_pool);           // in case somebody's waiting
   mysql_mutex_unlock(&LOCK_pool);
 
   /* marking 'syncing' slot free */
   mysql_mutex_lock(&LOCK_sync);
+  mysql_cond_broadcast(&syncing->cond);    // signal "sync done"
   syncing=0;
-  mysql_cond_signal(&active->cond);        // wake up a new syncer
+  /*
+    we check the "active" pointer without LOCK_active. Still, it's safe -
+    "active" can change from NULL to not NULL any time, but it
+    will take LOCK_sync before waiting on active->cond. That is, it can never
+    miss a signal.
+    And "active" can change to NULL only by the syncing thread
+    (the thread that will send a signal below)
+  */
+  if (active)
+    mysql_cond_signal(&active->cond);      // wake up a new syncer
   mysql_mutex_unlock(&LOCK_sync);
   return err;
 }
@@ -6249,13 +7090,13 @@ int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
 
   DBUG_ASSERT(*x == xid);
   DBUG_ASSERT(x >= p->start && x < p->end);
-  *x=0;
 
   mysql_mutex_lock(&p->lock);
+  *x=0;
   p->free++;
   DBUG_ASSERT(p->free <= p->size);
   set_if_smaller(p->ptr, x);
-  if (p->free == p->size)               // the page is completely empty
+  if (p->free == p->size)              // the page is completely empty
     statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
   if (p->waiters == 0)                 // the page is in pool and ready to rock
     mysql_cond_signal(&COND_pool);     // ping ... for overflow()
@@ -6272,6 +7113,8 @@ void TC_LOG_MMAP::close()
     mysql_mutex_destroy(&LOCK_active);
     mysql_mutex_destroy(&LOCK_pool);
     mysql_cond_destroy(&COND_pool);
+    mysql_cond_destroy(&COND_active);
+    mysql_cond_destroy(&COND_queue_busy);
   case 5:
     data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
   case 4:
@@ -6299,7 +7142,7 @@ int TC_LOG_MMAP::recover()
   HASH xids;
   PAGE *p=pages, *end_p=pages+npages;
 
-  if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
+  if (bcmp(data, tc_log_magic, sizeof(tc_log_magic)))
   {
     sql_print_error("Bad magic header in tc log");
     goto err1;
@@ -6451,7 +7294,8 @@ int TC_LOG_BINLOG::open(const char *opt_name)
       goto err;
     }
 
-    if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
+    if ((ev= Log_event::read_log_event(&log, 0, &fdle,
+                                       opt_master_verify_checksum)) &&
         ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
         ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
     {
@@ -6481,42 +7325,86 @@ void TC_LOG_BINLOG::close()
   mysql_cond_destroy(&COND_prep_xids);
 }
 
-/**
-  @todo
-  group commit
-
-  @retval
-    0    error
-  @retval
-    1    success
+/*
+  Do a binlog log_xid() for a group of transactions, linked through
+  thd->next_commit_ordered.
 */
-int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
+int
+TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
+                             bool need_prepare_ordered __attribute__((unused)),
+                             bool need_commit_ordered __attribute__((unused)))
 {
-  DBUG_ENTER("TC_LOG_BINLOG::log");
+  int err;
+  DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
+
   binlog_cache_mngr *cache_mngr=
     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
-  /*
-    We always commit the entire transaction when writing an XID. Also
-    note that the return value is inverted.
-   */
-  DBUG_RETURN(!binlog_commit_flush_stmt_cache(thd, cache_mngr) &&
-              !binlog_commit_flush_trx_cache(thd, cache_mngr, xid));
+
+  cache_mngr->using_xa= TRUE;
+  cache_mngr->xa_xid= xid;
+  err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
+
+  DEBUG_SYNC(thd, "binlog_after_log_and_order");
+
+  DBUG_RETURN(!err);
 }
 
-int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+/*
+  After an XID is logged, we need to hold on to the current binlog file until
+  it is fully committed in the storage engine. The reason is that crash
+  recovery only looks at the latest binlog, so we must make sure there are no
+  outstanding prepared (but not committed) transactions before rotating the
+  binlog.
+
+  To handle this, we keep a count of outstanding XIDs. This function is used
+  to increase this count when committing one or more transactions to the
+  binary log.
+*/
+void
+TC_LOG_BINLOG::mark_xids_active(uint xid_count)
 {
-  DBUG_ENTER("TC_LOG_BINLOG::unlog");
+  DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
+  DBUG_PRINT("info", ("xid_count=%u", xid_count));
+  mysql_mutex_lock(&LOCK_prep_xids);
+  prepared_xids+= xid_count;
+  mysql_mutex_unlock(&LOCK_prep_xids);
+  DBUG_VOID_RETURN;
+}
+
+/*
+  Once an XID is committed, it is safe to rotate the binary log, as it can no
+  longer be needed during crash recovery.
+
+  This function is called to mark an XID this way. It needs to decrease the
+  count of pending XIDs, and signal the log rotator thread when it reaches zero.
+*/
+void
+TC_LOG_BINLOG::mark_xid_done()
+{
+  my_bool send_signal;
+
+  DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
   mysql_mutex_lock(&LOCK_prep_xids);
   // prepared_xids can be 0 if the transaction had ignorable errors.
   DBUG_ASSERT(prepared_xids >= 0);
   if (prepared_xids > 0)
     prepared_xids--;
-  if (prepared_xids == 0) {
+  send_signal= (prepared_xids == 0);
+  mysql_mutex_unlock(&LOCK_prep_xids);
+  if (send_signal) {
     DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids));
     mysql_cond_signal(&COND_prep_xids);
   }
-  mysql_mutex_unlock(&LOCK_prep_xids);
-  DBUG_RETURN(rotate_and_purge(0));     // as ::write() did not rotate
+  DBUG_VOID_RETURN;
+}
+
+int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+{
+  DBUG_ENTER("TC_LOG_BINLOG::unlog");
+  if (xid)
+    mark_xid_done();
+  /* As ::write_transaction_to_binlog() did not rotate, do it here. */
+  DBUG_RETURN(rotate_and_purge(0));
 }
 
 int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
@@ -6534,7 +7422,9 @@ int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
 
   fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
 
-  while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
+  while ((ev= Log_event::read_log_event(log, 0, fdle,
+                                        opt_master_verify_checksum))
+         && ev->is_valid())
   {
     if (ev->get_type_code() == XID_EVENT)
     {
@@ -6585,13 +7475,174 @@ ulonglong mysql_bin_log_file_pos(void)
 {
   return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
 }
+/*
+  Get the current position of the MySQL binlog for transaction currently being
+  committed.
+
+  This is valid to call from within storage engine commit_ordered() and
+  commit() methods only.
+
+  Since it stores the position inside THD, it is safe to call without any
+  locking.
+*/
+void
+mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
+{
+  binlog_cache_mngr *cache_mngr;
+  if (opt_bin_log &&
+      (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton)))
+  {
+    *out_file= cache_mngr->last_commit_pos_file;
+    *out_pos= (ulonglong)(cache_mngr->last_commit_pos_offset);
+  }
+  else
+  {
+    *out_file= NULL;
+    *out_pos= 0;
+  }
+}
 #endif /* INNODB_COMPATIBILITY_HOOKS */
 
 
+static void
+binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
+                       void *var_ptr, const void *save)
+{
+  ulong value=  *((ulong *)save);
+
+  mysql_mutex_lock(mysql_bin_log.get_log_lock());
+  if(mysql_bin_log.is_open())
+  {
+    uint flags= RP_FORCE_ROTATE | RP_LOCK_LOG_IS_ALREADY_LOCKED |
+      (binlog_checksum_options != (uint) value?
+       RP_BINLOG_CHECKSUM_ALG_CHANGE : 0);
+    if (flags & RP_BINLOG_CHECKSUM_ALG_CHANGE)
+      mysql_bin_log.checksum_alg_reset= (uint8) value;
+    mysql_bin_log.rotate_and_purge(flags);
+  }
+  else
+  {
+    binlog_checksum_options= value;
+  }
+  DBUG_ASSERT((ulong) binlog_checksum_options == value);
+  DBUG_ASSERT(mysql_bin_log.checksum_alg_reset == BINLOG_CHECKSUM_ALG_UNDEF);
+  mysql_mutex_unlock(mysql_bin_log.get_log_lock());
+}
+
+
+static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
+{
+  mysql_bin_log.set_status_variables(thd);
+  var->type= SHOW_ARRAY;
+  var->value= (char *)&binlog_status_vars_detail;
+  return 0;
+}
+
+static SHOW_VAR binlog_status_vars_top[]= {
+  {"binlog", (char *) &show_binlog_vars, SHOW_FUNC},
+  {NullS, NullS, SHOW_LONG}
+};
+
+static MYSQL_SYSVAR_BOOL(
+  optimize_thread_scheduling,
+  opt_optimize_thread_scheduling,
+  PLUGIN_VAR_READONLY,
+  "Run fast part of group commit in a single thread, to optimize kernel "
+  "thread scheduling. On by default. Disable to run each transaction in group "
+  "commit in its own thread, which can be slower at very high concurrency. "
+  "This option is mostly for testing one algorithm versus the other, and it "
+  "should not normally be necessary to change it.",
+  NULL,
+  NULL,
+  1);
+
+static MYSQL_SYSVAR_ENUM(
+  checksum,
+  binlog_checksum_options,
+  PLUGIN_VAR_RQCMDARG,
+  "Type of BINLOG_CHECKSUM_ALG. Include checksum for "
+  "log events in the binary log. Possible values are NONE and CRC32; "
+  "default is NONE.",
+  NULL,
+  binlog_checksum_update,
+  BINLOG_CHECKSUM_ALG_OFF,
+  &binlog_checksum_typelib);
+
+#ifndef DBUG_OFF
+static MYSQL_SYSVAR_ULONG(
+  dbug_fsync_sleep,
+  opt_binlog_dbug_fsync_sleep,
+  PLUGIN_VAR_RQCMDARG,
+  "Extra sleep (in microseconds) to add to binlog fsync(), for debugging",
+  NULL,
+  NULL,
+  0,
+  0,
+  ULONG_MAX,
+  0);
+#endif
+
+static struct st_mysql_sys_var *binlog_sys_vars[]=
+{
+  MYSQL_SYSVAR(optimize_thread_scheduling),
+  MYSQL_SYSVAR(checksum),
+#ifndef DBUG_OFF
+  MYSQL_SYSVAR(dbug_fsync_sleep),
+#endif
+  NULL
+};
+
+
+/*
+  Copy out the non-directory part of binlog position filename for the
+  `binlog_snapshot_file' status variable, same way as it is done for
+  SHOW MASTER STATUS.
+*/
+static void
+set_binlog_snapshot_file(const char *src)
+{
+  int dir_len = dirname_length(src);
+  strmake(binlog_snapshot_file, src + dir_len, sizeof(binlog_snapshot_file)-1);
+}
+
+/*
+  Copy out current values of status variables, for SHOW STATUS or
+  information_schema.global_status.
+
+  This is called only under LOCK_status, so we can fill in a static array.
+*/
+void
+TC_LOG_BINLOG::set_status_variables(THD *thd)
+{
+  binlog_cache_mngr *cache_mngr;
+
+  if (thd && opt_bin_log)
+    cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+  else
+    cache_mngr= 0;
+
+  bool have_snapshot= (cache_mngr && cache_mngr->last_commit_pos_file[0] != 0);
+  mysql_mutex_lock(&LOCK_commit_ordered);
+  binlog_status_var_num_commits= this->num_commits;
+  binlog_status_var_num_group_commits= this->num_group_commits;
+  if (!have_snapshot)
+  {
+    set_binlog_snapshot_file(last_commit_pos_file);
+    binlog_snapshot_position= last_commit_pos_offset;
+  }
+  mysql_mutex_unlock(&LOCK_commit_ordered);
+
+  if (have_snapshot)
+  {
+    set_binlog_snapshot_file(cache_mngr->last_commit_pos_file);
+    binlog_snapshot_position= cache_mngr->last_commit_pos_offset;
+  }
+}
+
 struct st_mysql_storage_engine binlog_storage_engine=
 { MYSQL_HANDLERTON_INTERFACE_VERSION };
 
-mysql_declare_plugin(binlog)
+maria_declare_plugin(binlog)
 {
   MYSQL_STORAGE_ENGINE_PLUGIN,
   &binlog_storage_engine,
@@ -6602,9 +7653,9 @@ mysql_declare_plugin(binlog)
   binlog_init, /* Plugin Init */
   NULL, /* Plugin Deinit */
   0x0100 /* 1.0 */,
-  NULL,                       /* status variables                */
-  NULL,                       /* system variables                */
-  NULL,                       /* config options                  */
-  0,                          /* flags                           */
+  binlog_status_vars_top,     /* status variables                */
+  binlog_sys_vars,            /* system variables                */
+  "1.0",                      /* string version */
+  MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
 }
-mysql_declare_plugin_end;
+maria_declare_plugin_end;
diff --git a/sql/log.h b/sql/log.h
index 0f0f81dd402..c85e643d4e9 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -44,17 +44,58 @@ class TC_LOG
 
   virtual int open(const char *opt_name)=0;
   virtual void close()=0;
-  virtual int log_xid(THD *thd, my_xid xid)=0;
+  virtual int log_and_order(THD *thd, my_xid xid, bool all,
+                            bool need_prepare_ordered,
+                            bool need_commit_ordered) = 0;
   virtual int unlog(ulong cookie, my_xid xid)=0;
+
+protected:
+  /*
+    These methods are meant to be invoked from log_and_order() implementations
+    to run any prepare_ordered() respectively commit_ordered() methods in
+    participating handlers.
+
+    They must be called using suitable thread syncronisation to ensure that
+    they are each called in the correct commit order among all
+    transactions. However, it is only necessary to call them if the
+    corresponding flag passed to log_and_order is set (it is safe, but not
+    required, to call them when the flag is false).
+
+    The caller must be holding LOCK_prepare_ordered respectively
+    LOCK_commit_ordered when calling these methods.
+  */
+  void run_prepare_ordered(THD *thd, bool all);
+  void run_commit_ordered(THD *thd, bool all);
 };
 
+/*
+  Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered()
+  and TC_LOG::run_commit_ordered(), or any other code that calls handler
+  prepare_ordered() or commit_ordered() methods.
+*/
+extern mysql_mutex_t LOCK_prepare_ordered;
+extern mysql_mutex_t LOCK_commit_ordered;
+#ifdef HAVE_PSI_INTERFACE
+extern PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered;
+#endif
+
 class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
 {
 public:
   TC_LOG_DUMMY() {}
   int open(const char *opt_name)        { return 0; }
   void close()                          { }
-  int log_xid(THD *thd, my_xid xid)         { return 1; }
+  /*
+    TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we
+    only use internal XA during commit when >= 2 XA-capable engines
+    participate.
+  */
+  int log_and_order(THD *thd, my_xid xid, bool all,
+                    bool need_prepare_ordered, bool need_commit_ordered)
+  {
+    DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
+    return 1;
+  }
   int unlog(ulong cookie, my_xid xid)  { return 0; }
 };
 
@@ -80,6 +121,13 @@ class TC_LOG_MMAP: public TC_LOG
     mysql_cond_t  cond; // to wait for a sync
   } PAGE;
 
+  /* List of THDs for which to invoke commit_ordered(), in order. */
+  struct commit_entry
+  {
+    struct commit_entry *next;
+    THD *thd;
+  };
+
   char logname[FN_REFLEN];
   File fd;
   my_off_t file_length;
@@ -94,16 +142,38 @@ class TC_LOG_MMAP: public TC_LOG
   */
   mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
   mysql_cond_t COND_pool, COND_active;
+  /*
+    Queue of threads that need to call commit_ordered().
+    Access to this queue must be protected by LOCK_prepare_ordered.
+  */
+  commit_entry *commit_ordered_queue;
+  /*
+    This flag and condition is used to reserve the queue while threads in it
+    each run the commit_ordered() methods one after the other. Only once the
+    last commit_ordered() in the queue is done can we start on a new queue
+    run.
+
+    Since we start this process in the first thread in the queue and finish in
+    the last (and possibly different) thread, we need a condition variable for
+    this (we cannot unlock a mutex in a different thread than the one who
+    locked it).
+
+    The condition is used together with the LOCK_prepare_ordered mutex.
+  */
+  mysql_cond_t COND_queue_busy;
+  my_bool commit_ordered_queue_busy;
 
   public:
   TC_LOG_MMAP(): inited(0) {}
   int open(const char *opt_name);
   void close();
-  int log_xid(THD *thd, my_xid xid);
+  int log_and_order(THD *thd, my_xid xid, bool all,
+                    bool need_prepare_ordered, bool need_commit_ordered);
   int unlog(ulong cookie, my_xid xid);
   int recover();
 
   private:
+  int log_one_transaction(my_xid xid);
   void get_active_from_pool();
   int sync();
   int overflow();
@@ -158,14 +228,20 @@ typedef struct st_log_info
   my_off_t pos;
   bool fatal; // if the purge happens to give us a negative offset
   mysql_mutex_t lock;
-  st_log_info()
-    : index_file_offset(0), index_file_start_offset(0),
+  st_log_info() : index_file_offset(0), index_file_start_offset(0),
       pos(0), fatal(0)
-    {
-      log_file_name[0] = '\0';
-      mysql_mutex_init(key_LOG_INFO_lock, &lock, MY_MUTEX_INIT_FAST);
-    }
-  ~st_log_info() { mysql_mutex_destroy(&lock);}
+  {
+    DBUG_ENTER("LOG_INFO");
+    log_file_name[0] = '\0';
+    mysql_mutex_init(key_LOG_INFO_lock, &lock, MY_MUTEX_INIT_FAST);
+    DBUG_VOID_RETURN;
+  }
+  ~st_log_info()
+  {
+    DBUG_ENTER("~LOG_INFO");
+    mysql_mutex_destroy(&lock);
+    DBUG_VOID_RETURN;
+  }
 } LOG_INFO;
 
 /*
@@ -242,7 +318,7 @@ public:
              uint user_host_len, int thread_id,
              const char *command_type, uint command_type_len,
              const char *sql_text, uint sql_text_len);
-  bool write(THD *thd, time_t current_time, time_t query_start_arg,
+  bool write(THD *thd, time_t current_time,
              const char *user_host, uint user_host_len,
              ulonglong query_utime, ulonglong lock_utime, bool is_command,
              const char *sql_text, uint sql_text_len);
@@ -271,6 +347,7 @@ private:
   time_t last_time;
 };
 
+class binlog_cache_mngr;
 class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
 {
  private:
@@ -283,7 +360,33 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
   PSI_file_key m_key_file_log;
   /** The instrumentation key to use for opening the log index file. */
   PSI_file_key m_key_file_log_index;
+
+  PSI_file_key m_key_COND_queue_busy;
 #endif
+
+  struct group_commit_entry
+  {
+    struct group_commit_entry *next;
+    THD *thd;
+    binlog_cache_mngr *cache_mngr;
+    bool using_stmt_cache;
+    bool using_trx_cache;
+    /*
+      Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
+      written during group commit. The incident_event is only valid if
+      trx_data->has_incident() is true.
+    */
+    Log_event *begin_event;
+    Log_event *end_event;
+    Log_event *incident_event;
+    /* Set during group commit to record any per-thread error. */
+    int error;
+    int commit_errno;
+    IO_CACHE *error_cache;
+    /* This is the `all' parameter for ha_commit_ordered(). */
+    bool all;
+  };
+
   /* LOCK_log and LOCK_index are inited by init_pthread_objects() */
   mysql_mutex_t LOCK_index;
   mysql_mutex_t LOCK_prep_xids;
@@ -325,6 +428,20 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
     In 5.0 it's 0 for relay logs too!
   */
   bool no_auto_events;
+  /* Queue of transactions queued up to participate in group commit. */
+  group_commit_entry *group_commit_queue;
+  /*
+    Condition variable to mark that the group commit queue is busy.
+    Used when each thread does it's own commit_ordered() (when
+    binlog_optimize_thread_scheduling=1).
+    Used with the LOCK_commit_ordered mutex.
+  */
+  my_bool group_commit_queue_busy;
+  mysql_cond_t COND_queue_busy;
+  /* Total number of committed transactions. */
+  ulonglong num_commits;
+  /* Number of group commits done. */
+  ulonglong num_group_commits;
 
   /* pointer to the sync period variable, for binlog this will be
      sync_binlog_period, for relay log this will be
@@ -346,6 +463,11 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
   */
   int new_file_without_locking();
   int new_file_impl(bool need_lock);
+  int write_transaction_or_stmt(group_commit_entry *entry);
+  bool write_transaction_to_binlog_events(group_commit_entry *entry);
+  void trx_group_commit_leader(group_commit_entry *leader);
+  void mark_xid_done();
+  void mark_xids_active(uint xid_count);
 
 public:
   MYSQL_LOG::generate_name;
@@ -354,6 +476,41 @@ public:
   /* This is relay log */
   bool is_relay_log;
   ulong signal_cnt;  // update of the counter is checked by heartbeat
+  uint8 checksum_alg_reset; // to contain a new value when binlog is rotated
+  /*
+    Holds the last seen in Relay-Log FD's checksum alg value.
+    The initial value comes from the slave's local FD that heads
+    the very first Relay-Log file. In the following the value may change
+    with each received master's FD_m.
+    Besides to be used in verification events that IO thread receives
+    (except the 1st fake Rotate, see @c Master_info:: checksum_alg_before_fd), 
+    the value specifies if/how to compute checksum for slave's local events
+    and the first fake Rotate (R_f^1) coming from the master.
+    R_f^1 needs logging checksum-compatibly with the RL's heading FD_s.
+
+    Legends for the checksum related comments:
+
+    FD     - Format-Description event,
+    R      - Rotate event
+    R_f    - the fake Rotate event
+    E      - an arbirary event
+
+    The underscore indexes for any event
+    `_s'   indicates the event is generated by Slave
+    `_m'   - by Master
+
+    Two special underscore indexes of FD:
+    FD_q   - Format Description event for queuing   (relay-logging)
+    FD_e   - Format Description event for executing (relay-logging)
+
+    Upper indexes:
+    E^n    - n:th event is a sequence
+
+    RL     - Relay Log
+    (A)    - checksum algorithm descriptor value
+    FD.(A) - the value of (A) in FD
+  */
+  uint8 relay_log_checksum_alg;
   /*
     These describe the log's format. This is used only for relay logs.
     _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
@@ -364,6 +521,12 @@ public:
   */
   Format_description_log_event *description_event_for_exec,
     *description_event_for_queue;
+  /*
+    Binlog position of last commit (or non-transactional write) to the binlog.
+    Access to this is protected by LOCK_commit_ordered.
+  */
+  char last_commit_pos_file[FN_REFLEN];
+  my_off_t last_commit_pos_offset;
 
   MYSQL_BIN_LOG(uint *sync_period);
   /*
@@ -387,7 +550,8 @@ public:
 
   int open(const char *opt_name);
   void close();
-  int log_xid(THD *thd, my_xid xid);
+  int log_and_order(THD *thd, my_xid xid, bool all,
+                    bool need_prepare_ordered, bool need_commit_ordered);
   int unlog(ulong cookie, my_xid xid);
   int recover(IO_CACHE *log, Format_description_log_event *fdle);
 #if !defined(MYSQL_CLIENT)
@@ -433,11 +597,15 @@ public:
   /* Use this to start writing a new log file */
   int new_file();
 
-  bool write(Log_event* event_info); // binary log write
-  bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
-  bool write_incident(THD *thd, bool lock);
+  bool write(Log_event* event_info,
+             my_bool *with_annotate= 0); // binary log write
+  bool write_transaction_to_binlog(THD *thd, binlog_cache_mngr *cache_mngr,
+                                   Log_event *end_ev, bool all,
+                                   bool using_stmt_cache, bool using_trx_cache);
 
-  int  write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
+  bool write_incident_already_locked(THD *thd);
+  bool write_incident(THD *thd);
+  int  write_cache(THD *thd, IO_CACHE *cache);
   void set_write_error(THD *thd, bool is_transactional);
   bool check_write_error(THD *thd);
 
@@ -506,6 +674,7 @@ public:
   inline void unlock_index() { mysql_mutex_unlock(&LOCK_index);}
   inline IO_CACHE *get_index_file() { return &index_file;}
   inline uint32 get_open_count() { return open_count; }
+  void set_status_variables(THD *thd);
 };
 
 class Log_event_handler
@@ -515,14 +684,14 @@ public:
   virtual bool init()= 0;
   virtual void cleanup()= 0;
 
-  virtual bool log_slow(THD *thd, time_t current_time,
-                        time_t query_start_arg, const char *user_host,
+  virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+                        const char *user_host,
                         uint user_host_len, ulonglong query_utime,
                         ulonglong lock_utime, bool is_command,
                         const char *sql_text, uint sql_text_len)= 0;
   virtual bool log_error(enum loglevel level, const char *format,
                          va_list args)= 0;
-  virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+  virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
                            uint user_host_len, int thread_id,
                            const char *command_type, uint command_type_len,
                            const char *sql_text, uint sql_text_len,
@@ -544,14 +713,14 @@ public:
   virtual bool init();
   virtual void cleanup();
 
-  virtual bool log_slow(THD *thd, time_t current_time,
-                        time_t query_start_arg, const char *user_host,
+  virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+                        const char *user_host,
                         uint user_host_len, ulonglong query_utime,
                         ulonglong lock_utime, bool is_command,
                         const char *sql_text, uint sql_text_len);
   virtual bool log_error(enum loglevel level, const char *format,
                          va_list args);
-  virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+  virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
                            uint user_host_len, int thread_id,
                            const char *command_type, uint command_type_len,
                            const char *sql_text, uint sql_text_len,
@@ -576,14 +745,14 @@ public:
   virtual bool init();
   virtual void cleanup();
 
-  virtual bool log_slow(THD *thd, time_t current_time,
-                        time_t query_start_arg, const char *user_host,
+  virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+                        const char *user_host,
                         uint user_host_len, ulonglong query_utime,
                         ulonglong lock_utime, bool is_command,
                         const char *sql_text, uint sql_text_len);
   virtual bool log_error(enum loglevel level, const char *format,
                          va_list args);
-  virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+  virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
                            uint user_host_len, int thread_id,
                            const char *command_type, uint command_type_len,
                            const char *sql_text, uint sql_text_len,
@@ -708,7 +877,7 @@ bool flush_error_log();
 File open_binlog(IO_CACHE *log, const char *log_file_name,
                  const char **errmsg);
 
-char *make_log_name(char *buff, const char *name, const char* log_ext);
+void make_default_log_name(char **out, const char* log_ext, bool once);
 
 extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log;
 extern LOGGER logger;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index b29bcfdb536..93f0f8dd96a 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -52,6 +52,31 @@
 #include <my_bitmap.h>
 #include "rpl_utility.h"
 
+
+/**
+  BINLOG_CHECKSUM variable.
+*/
+const char *binlog_checksum_type_names[]= {
+  "NONE",
+  "CRC32",
+  NullS
+};
+
+unsigned int binlog_checksum_type_length[]= {
+  sizeof("NONE") - 1,
+  sizeof("CRC32") - 1,
+  0
+};
+
+TYPELIB binlog_checksum_typelib=
+{
+  array_elements(binlog_checksum_type_names) - 1, "",
+  binlog_checksum_type_names,
+  binlog_checksum_type_length
+};
+
+
+
 #define log_cs	&my_charset_latin1
 
 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
@@ -65,6 +90,24 @@
 */
 #define FMT_G_BUFSIZE(PREC) (3 + (PREC) + 5 + 1)
 
+/* 
+   replication event checksum is introduced in the following "checksum-home" version.
+   The checksum-aware servers extract FD's version to decide whether the FD event
+   carries checksum info.
+
+   TODO: correct the constant when it has been determined 
+   (which main tree to push and when) 
+*/
+const uchar checksum_version_split_mysql[3]= {5, 6, 1};
+const ulong checksum_version_product_mysql=
+  (checksum_version_split_mysql[0] * 256 +
+   checksum_version_split_mysql[1]) * 256 +
+  checksum_version_split_mysql[2];
+const uchar checksum_version_split_mariadb[3]= {5, 3, 0};
+const ulong checksum_version_product_mariadb=
+  (checksum_version_split_mariadb[0] * 256 +
+   checksum_version_split_mariadb[1]) * 256 +
+  checksum_version_split_mariadb[2];
 
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD* thd);
@@ -593,7 +636,7 @@ append_query_string(CHARSET_INFO *csinfo,
   if (to->reserve(orig_len + from->length()*2+3))
     return 1;
 
-  beg= to->c_ptr_quick() + to->length();
+  beg= (char*) to->ptr() + to->length();
   ptr= beg;
   if (csinfo->escape_with_backslash_is_dangerous)
     ptr= str_to_hex(ptr, from->ptr(), from->length());
@@ -630,7 +673,6 @@ static void print_set_option(IO_CACHE* file, uint32 bits_changed,
   }
 }
 #endif
-
 /**************************************************************************
 	Log_event methods (= the parent class of all events)
 **************************************************************************/
@@ -669,6 +711,7 @@ const char* Log_event::get_type_str(Log_event_type type)
   case BEGIN_LOAD_QUERY_EVENT: return "Begin_load_query";
   case EXECUTE_LOAD_QUERY_EVENT: return "Execute_load_query";
   case INCIDENT_EVENT: return "Incident";
+  case ANNOTATE_ROWS_EVENT: return "Annotate_rows";
   default: return "Unknown";				/* impossible */
   }
 }
@@ -686,10 +729,12 @@ const char* Log_event::get_type_str()
 #ifndef MYSQL_CLIENT
 Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans)
   :log_pos(0), temp_buf(0), exec_time(0), flags(flags_arg),
-  cache_type(Log_event::EVENT_INVALID_CACHE), thd(thd_arg)
+   crc(0), thd(thd_arg),
+   checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF)
 {
   server_id=	thd->server_id;
-  when=		thd->start_time;
+  when=         thd->start_time;
+  when_sec_part=thd->start_time_sec_part;
 
   if (using_trans)
     cache_type= Log_event::EVENT_TRANSACTIONAL_CACHE;
@@ -706,14 +751,16 @@ Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans)
 
 Log_event::Log_event()
   :temp_buf(0), exec_time(0), flags(0),
-  cache_type(Log_event::EVENT_INVALID_CACHE), thd(0)
+   cache_type(Log_event::EVENT_INVALID_CACHE), crc(0),
+   thd(0), checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF)
 {
   server_id=	::server_id;
   /*
     We can't call my_time() here as this would cause a call before
     my_init() is called
   */
-  when=		0;
+  when=         0;
+  when_sec_part=0;
   log_pos=	0;
 }
 #endif /* !MYSQL_CLIENT */
@@ -725,12 +772,14 @@ Log_event::Log_event()
 
 Log_event::Log_event(const char* buf,
                      const Format_description_log_event* description_event)
-  :temp_buf(0), cache_type(Log_event::EVENT_INVALID_CACHE)
+  :temp_buf(0), cache_type(Log_event::EVENT_INVALID_CACHE),
+    crc(0), checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF)
 {
 #ifndef MYSQL_CLIENT
   thd = 0;
 #endif
   when = uint4korr(buf);
+  when_sec_part= 0;
   server_id = uint4korr(buf + SERVER_ID_OFFSET);
   data_written= uint4korr(buf + EVENT_LEN_OFFSET);
   if (description_event->binlog_version==1)
@@ -752,7 +801,7 @@ Log_event::Log_event(const char* buf,
     logs are in 4.0 format, until it finds a Format_desc).
   */
   if (description_event->binlog_version==3 &&
-      buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos)
+      (uchar)buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos)
   {
       /*
         If log_pos=0, don't change it. log_pos==0 is a marker to mean
@@ -770,8 +819,8 @@ Log_event::Log_event(const char* buf,
   DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos));
 
   flags= uint2korr(buf + FLAGS_OFFSET);
-  if ((buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
-      (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
+  if (((uchar)buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
+      ((uchar)buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
   {
     /*
       These events always have a header which stops here (i.e. their
@@ -819,21 +868,13 @@ int Log_event::do_update_pos(Relay_log_info *rli)
     DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp",
                     if (debug_not_change_ts_if_art_event == 1
                         && is_artificial_event())
-                    {
-                      debug_not_change_ts_if_art_event= 0;
-                    });
-#ifndef DBUG_OFF
-    rli->stmt_done(log_pos, 
-                   is_artificial_event() &&
-                   debug_not_change_ts_if_art_event > 0 ? 0 : when);
-#else
-    rli->stmt_done(log_pos, is_artificial_event()? 0 : when);
-#endif
+                      debug_not_change_ts_if_art_event= 0; );
+    rli->stmt_done(log_pos, is_artificial_event() &&
+                   IF_DBUG(debug_not_change_ts_if_art_event > 0, 1) ?
+                     0 : when);
     DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp",
                     if (debug_not_change_ts_if_art_event == 0)
-                    {
-                      debug_not_change_ts_if_art_event= 2;
-                    });
+                      debug_not_change_ts_if_art_event= 2; );
   }
   return 0;                                   // Cannot fail currently
 }
@@ -851,10 +892,9 @@ Log_event::do_shall_skip(Relay_log_info *rli)
   if ((server_id == ::server_id && !rli->replicate_same_server_id) ||
       (rli->slave_skip_counter == 1 && rli->is_in_group()))
     return EVENT_SKIP_IGNORE;
-  else if (rli->slave_skip_counter > 0)
+  if (rli->slave_skip_counter > 0)
     return EVENT_SKIP_COUNT;
-  else
-    return EVENT_SKIP_NOT;
+  return EVENT_SKIP_NOT;
 }
 
 
@@ -911,6 +951,105 @@ void Log_event::init_show_field_list(List<Item>* field_list)
   field_list->push_back(new Item_empty_string("Info", 20));
 }
 
+/**
+   A decider of whether to trigger checksum computation or not.
+   To be invoked in Log_event::write() stack.
+   The decision is positive 
+
+    S,M) if it's been marked for checksumming with @c checksum_alg
+    
+    M) otherwise, if @@global.binlog_checksum is not NONE and the event is 
+       directly written to the binlog file.
+       The to-be-cached event decides at @c write_cache() time.
+
+   Otherwise the decision is negative.
+
+   @note   A side effect of the method is altering Log_event::checksum_alg
+           it the latter was undefined at calling.
+
+   @return true (positive) or false (negative)
+*/
+my_bool Log_event::need_checksum()
+{
+  DBUG_ENTER("Log_event::need_checksum");
+  my_bool ret;
+  /* 
+     few callers of Log_event::write 
+     (incl FD::write, FD constructing code on the slave side, Rotate relay log
+     and Stop event) 
+     provides their checksum alg preference through Log_event::checksum_alg.
+  */
+  ret= ((checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) ?
+        (checksum_alg != BINLOG_CHECKSUM_ALG_OFF) :
+        ((binlog_checksum_options != BINLOG_CHECKSUM_ALG_OFF) &&
+         (cache_type == Log_event::EVENT_NO_CACHE)) ?
+        test(binlog_checksum_options) : FALSE);
+
+  /*
+    FD calls the methods before data_written has been calculated.
+    The following invariant claims if the current is not the first
+    call (and therefore data_written is not zero) then `ret' must be
+    TRUE. It may not be null because FD is always checksummed.
+  */
+  
+  DBUG_ASSERT(get_type_code() != FORMAT_DESCRIPTION_EVENT || ret ||
+              data_written == 0);
+
+  if (checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+    checksum_alg= ret ? // calculated value stored
+      (uint8) binlog_checksum_options : (uint8) BINLOG_CHECKSUM_ALG_OFF;
+
+  DBUG_ASSERT(!ret || 
+              ((checksum_alg == binlog_checksum_options ||
+               /* 
+                  Stop event closes the relay-log and its checksum alg
+                  preference is set by the caller can be different
+                  from the server's binlog_checksum_options.
+               */
+               get_type_code() == STOP_EVENT ||
+               /* 
+                  Rotate:s can be checksummed regardless of the server's
+                  binlog_checksum_options. That applies to both
+                  the local RL's Rotate and the master's Rotate
+                  which IO thread instantiates via queue_binlog_ver_3_event.
+               */
+               get_type_code() == ROTATE_EVENT
+               ||  /* FD is always checksummed */
+               get_type_code() == FORMAT_DESCRIPTION_EVENT) && 
+               checksum_alg != BINLOG_CHECKSUM_ALG_OFF));
+
+  DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
+
+  DBUG_ASSERT(((get_type_code() != ROTATE_EVENT &&
+                get_type_code() != STOP_EVENT) ||
+               get_type_code() != FORMAT_DESCRIPTION_EVENT) ||
+              cache_type == Log_event::EVENT_NO_CACHE);
+
+  DBUG_RETURN(ret);
+}
+
+bool Log_event::wrapper_my_b_safe_write(IO_CACHE* file, const uchar* buf, ulong size)
+{
+  if (need_checksum() && size != 0)
+    crc= my_checksum(crc, buf, size);
+
+  return my_b_safe_write(file, buf, size);
+}
+
+bool Log_event::write_footer(IO_CACHE* file) 
+{
+  /*
+     footer contains the checksum-algorithm descriptor 
+     followed by the checksum value
+  */
+  if (need_checksum())
+  {
+    uchar buf[BINLOG_CHECKSUM_LEN];
+    int4store(buf, crc);
+    return (my_b_safe_write(file, (uchar*) buf, sizeof(buf)));
+  }
+  return 0;
+}
 
 /*
   Log_event::write()
@@ -920,11 +1059,18 @@ bool Log_event::write_header(IO_CACHE* file, ulong event_data_length)
 {
   uchar header[LOG_EVENT_HEADER_LEN];
   ulong now;
+  bool ret;
   DBUG_ENTER("Log_event::write_header");
 
   /* Store number of bytes that will be written by this event */
   data_written= event_data_length + sizeof(header);
 
+  if (need_checksum())
+  {
+    crc= my_checksum(0L, NULL, 0);
+    data_written += BINLOG_CHECKSUM_LEN;
+  }
+
   /*
     log_pos != 0 if this is relay-log event. In this case we should not
     change the position
@@ -969,7 +1115,7 @@ bool Log_event::write_header(IO_CACHE* file, ulong event_data_length)
     log_pos= my_b_safe_tell(file)+data_written;
   }
 
-  now= (ulong) get_time();                              // Query start time
+  now= get_time();                               // Query start time
 
   /*
     Header will be of size LOG_EVENT_HEADER_LEN for all events, except for
@@ -983,9 +1129,36 @@ bool Log_event::write_header(IO_CACHE* file, ulong event_data_length)
   int4store(header+ SERVER_ID_OFFSET, server_id);
   int4store(header+ EVENT_LEN_OFFSET, data_written);
   int4store(header+ LOG_POS_OFFSET, log_pos);
-  int2store(header+ FLAGS_OFFSET, flags);
-
-  DBUG_RETURN(my_b_safe_write(file, header, sizeof(header)) != 0);
+  /*
+    recording checksum of FD event computed with dropped
+    possibly active LOG_EVENT_BINLOG_IN_USE_F flag.
+    Similar step at verication: the active flag is dropped before
+    checksum computing.
+  */
+  if (header[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT ||
+      !need_checksum() || !(flags & LOG_EVENT_BINLOG_IN_USE_F))
+  {
+    int2store(header+ FLAGS_OFFSET, flags);
+    ret= wrapper_my_b_safe_write(file, header, sizeof(header)) != 0;
+  }
+  else
+  {
+    ret= (wrapper_my_b_safe_write(file, header, FLAGS_OFFSET) != 0);
+    if (!ret)
+    {
+      flags &= ~LOG_EVENT_BINLOG_IN_USE_F;
+      int2store(header + FLAGS_OFFSET, flags);
+      crc= my_checksum(crc, header + FLAGS_OFFSET, sizeof(flags));
+      flags |= LOG_EVENT_BINLOG_IN_USE_F;    
+      int2store(header + FLAGS_OFFSET, flags);
+      ret= (my_b_safe_write(file, header + FLAGS_OFFSET, sizeof(flags)) != 0);
+    }
+    if (!ret)
+      ret= (wrapper_my_b_safe_write(file, header + FLAGS_OFFSET + sizeof(flags),
+                                    sizeof(header)
+                                    - (FLAGS_OFFSET + sizeof(flags))) != 0);
+  }
+  DBUG_RETURN( ret);
 }
 
 
@@ -995,11 +1168,13 @@ bool Log_event::write_header(IO_CACHE* file, ulong event_data_length)
 */
 
 int Log_event::read_log_event(IO_CACHE* file, String* packet,
-                              mysql_mutex_t* log_lock)
+                              mysql_mutex_t* log_lock,
+                              uint8 checksum_alg_arg)
 {
   ulong data_len;
   int result=0;
   char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
+  uchar ev_offset= packet->length();
   DBUG_ENTER("Log_event::read_log_event");
 
   if (log_lock)
@@ -1057,6 +1232,31 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
                (file->error >= 0 ? LOG_READ_TRUNC: LOG_READ_IO));
       /* Implicit goto end; */
     }
+    else
+    {
+      /* Corrupt the event for Dump thread*/
+      DBUG_EXECUTE_IF("corrupt_read_log_event2",
+	uchar *debug_event_buf_c = (uchar*) packet->ptr() + ev_offset;
+        if (debug_event_buf_c[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
+        {
+          int debug_cor_pos = rand() % (data_len + sizeof(buf) - BINLOG_CHECKSUM_LEN);
+          debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
+          DBUG_PRINT("info", ("Corrupt the event at Log_event::read_log_event: byte on position %d", debug_cor_pos));
+          DBUG_SET("-d,corrupt_read_log_event2");
+	}
+      );                                                                                           
+      /*
+        CRC verification of the Dump thread
+      */
+      if (opt_master_verify_checksum &&
+          event_checksum_test((uchar*) packet->ptr() + ev_offset,
+                              data_len + sizeof(buf),
+                              checksum_alg_arg))
+      {
+        result= LOG_READ_CHECKSUM_FAILURE;
+        goto end;
+      }
+    }
   }
 
 end:
@@ -1082,11 +1282,13 @@ end:
 Log_event* Log_event::read_log_event(IO_CACHE* file,
                                      mysql_mutex_t* log_lock,
                                      const Format_description_log_event
-                                     *description_event)
+                                     *description_event,
+                                     my_bool crc_check)
 #else
 Log_event* Log_event::read_log_event(IO_CACHE* file,
                                      const Format_description_log_event
-                                     *description_event)
+                                     *description_event,
+                                     my_bool crc_check)
 #endif
 {
   DBUG_ENTER("Log_event::read_log_event");
@@ -1150,8 +1352,8 @@ failed my_b_read"));
     error = "read error";
     goto err;
   }
-  if ((res= read_log_event(buf, data_len, &error, description_event)))
-    res->register_temp_buf(buf);
+  if ((res= read_log_event(buf, data_len, &error, description_event, crc_check)))
+    res->register_temp_buf(buf, TRUE);
 
 err:
   UNLOCK_MUTEX;
@@ -1183,9 +1385,11 @@ err:
 
 Log_event* Log_event::read_log_event(const char* buf, uint event_len,
 				     const char **error,
-                                     const Format_description_log_event *description_event)
+                                     const Format_description_log_event *description_event,
+                                     my_bool crc_check)
 {
   Log_event* ev;
+  uint8 alg;
   DBUG_ENTER("Log_event::read_log_event(char*,...)");
   DBUG_ASSERT(description_event != 0);
   DBUG_PRINT("info", ("binlog_version: %d", description_event->binlog_version));
@@ -1193,14 +1397,68 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
 
   /* Check the integrity */
   if (event_len < EVENT_LEN_OFFSET ||
-      buf[EVENT_TYPE_OFFSET] >= ENUM_END_EVENT ||
+      (uchar)buf[EVENT_TYPE_OFFSET] >= ENUM_END_EVENT ||
       (uint) event_len != uint4korr(buf+EVENT_LEN_OFFSET))
   {
     *error="Sanity check failed";		// Needed to free buffer
     DBUG_RETURN(NULL); // general sanity check - will fail on a partial read
   }
 
-  uint event_type= buf[EVENT_TYPE_OFFSET];
+  uint event_type= (uchar)buf[EVENT_TYPE_OFFSET];
+  // all following START events in the current file are without checksum
+  if (event_type == START_EVENT_V3)
+    (const_cast< Format_description_log_event *>(description_event))->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
+  /*
+    CRC verification by SQL and Show-Binlog-Events master side.
+    The caller has to provide @description_event->checksum_alg to
+    be the last seen FD's (A) descriptor.
+    If event is FD the descriptor is in it.
+    Notice, FD of the binlog can be only in one instance and therefore
+    Show-Binlog-Events executing master side thread needs just to know
+    the only FD's (A) value -  whereas RL can contain more.
+    In the RL case, the alg is kept in FD_e (@description_event) which is reset 
+    to the newer read-out event after its execution with possibly new alg descriptor.
+    Therefore in a typical sequence of RL:
+    {FD_s^0, FD_m, E_m^1} E_m^1 
+    will be verified with (A) of FD_m.
+
+    See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg docs
+    lines (log.h).
+
+    Notice, a pre-checksum FD version forces alg := BINLOG_CHECKSUM_ALG_UNDEF.
+  */
+  alg= (event_type != FORMAT_DESCRIPTION_EVENT) ?
+    description_event->checksum_alg : get_checksum_alg(buf, event_len);
+  // Emulate the corruption during reading an event
+  DBUG_EXECUTE_IF("corrupt_read_log_event_char",
+    if (event_type != FORMAT_DESCRIPTION_EVENT)
+    {
+      char *debug_event_buf_c = (char *)buf;
+      int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
+      debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
+      DBUG_PRINT("info", ("Corrupt the event at Log_event::read_log_event(char*,...): byte on position %d", debug_cor_pos));
+      DBUG_SET("-d,corrupt_read_log_event_char");
+    }
+  );                                                 
+  if (crc_check &&
+      event_checksum_test((uchar *) buf, event_len, alg))
+  {
+#ifdef MYSQL_CLIENT
+    *error= "Event crc check failed! Most likely there is event corruption.";
+    if (force_opt)
+    {
+      ev= new Unknown_log_event(buf, description_event);
+      DBUG_RETURN(ev);
+    }
+    else
+      DBUG_RETURN(NULL);
+#else
+    *error= ER(ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE);
+    sql_print_error("%s", ER(ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE));
+    DBUG_RETURN(NULL);
+#endif
+  }
+
   if (event_type > description_event->number_of_event_types &&
       event_type != FORMAT_DESCRIPTION_EVENT)
   {
@@ -1235,6 +1493,11 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
       event_type= new_event_type;
     }
 
+    if (alg != BINLOG_CHECKSUM_ALG_UNDEF &&
+        (event_type == FORMAT_DESCRIPTION_EVENT ||
+         alg != BINLOG_CHECKSUM_ALG_OFF))
+      event_len= event_len - BINLOG_CHECKSUM_LEN;
+    
     switch(event_type) {
     case QUERY_EVENT:
       ev  = new Query_log_event(buf, event_len, description_event, QUERY_EVENT);
@@ -1318,6 +1581,9 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
     case INCIDENT_EVENT:
       ev = new Incident_log_event(buf, event_len, description_event);
       break;
+    case ANNOTATE_ROWS_EVENT:
+      ev = new Annotate_rows_log_event(buf, event_len, description_event);
+      break;
     default:
       DBUG_PRINT("error",("Unknown event code: %d",
                           (int) buf[EVENT_TYPE_OFFSET]));
@@ -1326,6 +1592,14 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
     }
   }
 
+  if (ev)
+  {
+    ev->checksum_alg= alg;
+    if (ev->checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+        ev->checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+      ev->crc= uint4korr(buf + (event_len));
+  }
+
   DBUG_PRINT("read_event", ("%s(type_code: %d; event_len: %d)",
                             ev ? ev->get_type_str() : "<unknown>",
                             buf[EVENT_TYPE_OFFSET],
@@ -1380,6 +1654,18 @@ void Log_event::print_header(IO_CACHE* file,
   my_b_printf(file, " server id %lu  end_log_pos %s ", (ulong) server_id,
               llstr(log_pos,llbuff));
 
+  /* print the checksum */
+
+  if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+      checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+  {
+    char checksum_buf[BINLOG_CHECKSUM_LEN * 2 + 4]; // to fit to "0x%lx "
+    size_t const bytes_written=
+      my_snprintf(checksum_buf, sizeof(checksum_buf), "0x%08lx ", (ulong) crc);
+    my_b_printf(file, "%s ", get_type(&binlog_checksum_typelib, checksum_alg));
+    my_b_printf(file, checksum_buf, bytes_written);
+  }
+
   /* mysqlbinlog --hexdump */
   if (print_event_info->hexdump_from)
   {
@@ -1721,13 +2007,14 @@ log_event_print_value(IO_CACHE *file, const uchar *ptr,
 
   case MYSQL_TYPE_DATETIME:
     {
-      size_t d, t;
+      ulong d, t;
       uint64 i64= uint8korr(ptr); /* YYYYMMDDhhmmss */
-      d= i64 / 1000000;
-      t= i64 % 1000000;
+      d= (ulong) (i64 / 1000000);
+      t= (ulong) (i64 % 1000000);
+
       my_b_printf(file, "%04d-%02d-%02d %02d:%02d:%02d",
-                  d / 10000, (d % 10000) / 100, d % 100,
-                  t / 10000, (t % 10000) / 100, t % 100);
+                  (int) (d / 10000), (int) (d % 10000) / 100, (int) (d % 100),
+                  (int) (t / 10000), (int) (t % 10000) / 100, (int) t % 100);
       my_snprintf(typestr, typestr_length, "DATETIME");
       return 8;
     }
@@ -2007,12 +2294,10 @@ end:
   delete td;
 }
 
-#ifdef MYSQL_CLIENT
 void free_table_map_log_event(Table_map_log_event *event)
 {
   delete event;
 }
-#endif
 
 void Log_event::print_base64(IO_CACHE* file,
                              PRINT_EVENT_INFO* print_event_info,
@@ -2049,6 +2334,9 @@ void Log_event::print_base64(IO_CACHE* file,
   if (print_event_info->verbose)
   {
     Rows_log_event *ev= NULL;
+    if (checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF &&
+        checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
+      size-= BINLOG_CHECKSUM_LEN; // checksum is displayed through the header
     
     if (ptr[4] == TABLE_MAP_EVENT)
     {
@@ -2092,15 +2380,11 @@ void Log_event::print_base64(IO_CACHE* file,
 void Log_event::print_timestamp(IO_CACHE* file, time_t* ts)
 {
   struct tm *res;
+  time_t my_when= when;
   DBUG_ENTER("Log_event::print_timestamp");
   if (!ts)
-    ts = &when;
-#ifdef MYSQL_SERVER				// This is always false
-  struct tm tm_tmp;
-  localtime_r(ts,(res= &tm_tmp));
-#else
+    ts = &my_when;
   res=localtime(ts);
-#endif
 
   my_b_printf(file,"%02d%02d%02d %2d:%02d:%02d",
               res->tm_year % 100,
@@ -2178,7 +2462,7 @@ static void write_str_with_code_and_len(uchar **dst, const char *src,
   */
   DBUG_ASSERT(len <= 255);
   DBUG_ASSERT(src);
-  *((*dst)++)= code;
+  *((*dst)++)= (uchar) code;
   *((*dst)++)= (uchar) len;
   bmove(*dst, src, len);
   (*dst)+= len;
@@ -2385,6 +2669,14 @@ bool Query_log_event::write(IO_CACHE* file)
       start+= host.length;
     }
   }
+
+  if (thd && thd->query_start_sec_part_used)
+  {
+    *start++= Q_HRNOW;
+    get_time();
+    int3store(start, when_sec_part);
+    start+= 3;
+  }
   /*
     NOTE: When adding new status vars, please don't forget to update
     the MAX_SIZE_LOG_EVENT_STATUS in log_event.h and update the function
@@ -2411,12 +2703,13 @@ bool Query_log_event::write(IO_CACHE* file)
   event_length= (uint) (start-buf) + get_post_header_size_for_derived() + db_len + 1 + q_len;
 
   return (write_header(file, event_length) ||
-          my_b_safe_write(file, (uchar*) buf, QUERY_HEADER_LEN) ||
+          wrapper_my_b_safe_write(file, (uchar*) buf, QUERY_HEADER_LEN) ||
           write_post_header_for_derived(file) ||
-          my_b_safe_write(file, (uchar*) start_of_status,
+          wrapper_my_b_safe_write(file, (uchar*) start_of_status,
                           (uint) (start-start_of_status)) ||
-          my_b_safe_write(file, (db) ? (uchar*) db : (uchar*)"", db_len + 1) ||
-          my_b_safe_write(file, (uchar*) query, q_len)) ? 1 : 0;
+          wrapper_my_b_safe_write(file, (db) ? (uchar*) db : (uchar*)"", db_len + 1) ||
+          wrapper_my_b_safe_write(file, (uchar*) query, q_len) ||
+	  write_footer(file)) ? 1 : 0;
 }
 
 /**
@@ -2476,7 +2769,7 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
 
   error_code= errcode;
 
-  time(&end_time);
+  end_time= my_time(0);
   exec_time = (ulong) (end_time  - thd_arg->start_time);
   /**
     @todo this means that if we have no catalog, then it is replicated
@@ -2668,6 +2961,7 @@ code_name(int code)
   case Q_CHARSET_DATABASE_CODE: return "Q_CHARSET_DATABASE_CODE";
   case Q_TABLE_MAP_FOR_UPDATE_CODE: return "Q_TABLE_MAP_FOR_UPDATE_CODE";
   case Q_MASTER_DATA_WRITTEN_CODE: return "Q_MASTER_DATA_WRITTEN_CODE";
+  case Q_HRNOW: return "Q_HRNOW";
   }
   sprintf(buf, "CODE#%d", code);
   return buf;
@@ -2884,6 +3178,14 @@ Query_log_event::Query_log_event(const char* buf, uint event_len,
       CHECK_SPACE(pos, end, host.length);
       host.str= (char *)pos;
       pos+= host.length;
+      break;
+    }
+    case Q_HRNOW:
+    {
+      CHECK_SPACE(pos, end, 3);
+      when_sec_part= uint3korr(pos);
+      pos+= 3;
+      break;
     }
     default:
       /* That's why you must write status vars in growing order of code */
@@ -2963,7 +3265,7 @@ void Query_log_event::print_query_header(IO_CACHE* file,
 					 PRINT_EVENT_INFO* print_event_info)
 {
   // TODO: print the catalog ??
-  char buff[40],*end;				// Enough for SET TIMESTAMP
+  char buff[64], *end;				// Enough for SET TIMESTAMP
   bool different_db= 1;
   uint32 tmp;
 
@@ -2990,6 +3292,11 @@ void Query_log_event::print_query_header(IO_CACHE* file,
   }
 
   end=int10_to_str((long) when, strmov(buff,"SET TIMESTAMP="),10);
+  if (when_sec_part)
+  {
+    *end++= '.';
+    end=int10_to_str(when_sec_part, end, 10);
+  }
   end= strmov(end, print_event_info->delimiter);
   *end++='\n';
   my_b_write(file, (uchar*) buff, (uint) (end-buff));
@@ -3251,7 +3558,7 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli,
   */
   if (is_trans_keyword() || rpl_filter->db_ok(thd->db))
   {
-    thd->set_time((time_t)when);
+    thd->set_time(when, when_sec_part);
     thd->set_query_and_id((char*)query_arg, q_len_arg,
                           thd->charset(), next_query_id());
     thd->variables.pseudo_thread_id= thread_id;		// for temp tables
@@ -3424,6 +3731,19 @@ START SLAVE; . Query: '%s'", expected_error, thd->query());
     /* If the query was not ignored, it is printed to the general log */
     if (!thd->is_error() || thd->stmt_da->sql_errno() != ER_SLAVE_IGNORED_TABLE)
       general_log_write(thd, COM_QUERY, thd->query(), thd->query_length());
+    else
+    {
+      /*
+        Bug#54201: If we skip an INSERT query that uses auto_increment, then we
+        should reset any @@INSERT_ID set by an Intvar_log_event associated with
+        the query; otherwise the @@INSERT_ID will linger until the next INSERT
+        that uses auto_increment and may affect extra triggers on the slave etc.
+
+        We reset INSERT_ID unconditionally; it is probably cheaper than
+        checking if it is necessary.
+      */
+      thd->auto_inc_intervals_forced.empty();
+    }
 
 compare_errors:
     /*
@@ -3713,10 +4033,11 @@ bool Start_log_event_v3::write(IO_CACHE* file)
   int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
   memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
   if (!dont_set_created)
-    created= when= get_time();
+    created= get_time(); // this sets when and when_sec_part as a side effect
   int4store(buff + ST_CREATED_OFFSET,created);
   return (write_header(file, sizeof(buff)) ||
-          my_b_safe_write(file, (uchar*) buff, sizeof(buff)));
+          wrapper_my_b_safe_write(file, (uchar*) buff, sizeof(buff)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -3819,6 +4140,7 @@ int Start_log_event_v3::do_apply_event(Relay_log_info const *rli)
                                 old 4.0 (binlog version 2) is not supported;
                                 it should not be used for replication with
                                 5.0.
+  @param server_ver             a string containing the server version.
 */
 
 Format_description_log_event::
@@ -3834,9 +4156,9 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver)
     common_header_len= LOG_EVENT_HEADER_LEN;
     number_of_event_types= LOG_EVENT_TYPES;
     /* we'll catch my_malloc() error in is_valid() */
-    post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8),
+    post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8)
+                                       + BINLOG_CHECKSUM_ALG_DESC_LEN,
                                        MYF(0));
-
     /*
       This long list of assignments is not beautiful, but I see no way to
       make it nicer, as the right members are #defines, not array members, so
@@ -3901,6 +4223,13 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver)
       post_header_len[INCIDENT_EVENT-1]= INCIDENT_HEADER_LEN;
       post_header_len[HEARTBEAT_LOG_EVENT-1]= 0;
 
+      // Set header length of the reserved events to 0
+      memset(post_header_len + MYSQL_EVENTS_END - 1, 0,
+             (MARIA_EVENTS_BEGIN - MYSQL_EVENTS_END)*sizeof(uint8));
+
+      // Set header lengths of Maria events
+      post_header_len[ANNOTATE_ROWS_EVENT-1]= ANNOTATE_ROWS_HEADER_LEN;
+
       // Sanity-check that all post header lengths are initialized.
       int i;
       for (i=0; i<number_of_event_types; i++)
@@ -3953,6 +4282,7 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver)
     break;
   }
   calc_server_version_split();
+  checksum_alg= (uint8) BINLOG_CHECKSUM_ALG_UNDEF;
 }
 
 
@@ -3987,14 +4317,26 @@ Format_description_log_event(const char* buf,
   if ((common_header_len=buf[ST_COMMON_HEADER_LEN_OFFSET]) < OLD_HEADER_LEN)
     DBUG_VOID_RETURN; /* sanity check */
   number_of_event_types=
-    event_len-(LOG_EVENT_MINIMAL_HEADER_LEN+ST_COMMON_HEADER_LEN_OFFSET+1);
+    event_len - (LOG_EVENT_MINIMAL_HEADER_LEN + ST_COMMON_HEADER_LEN_OFFSET + 1);
   DBUG_PRINT("info", ("common_header_len=%d number_of_event_types=%d",
                       common_header_len, number_of_event_types));
   /* If alloc fails, we'll detect it in is_valid() */
+
   post_header_len= (uint8*) my_memdup((uchar*)buf+ST_COMMON_HEADER_LEN_OFFSET+1,
                                       number_of_event_types*
-                                      sizeof(*post_header_len), MYF(0));
+                                      sizeof(*post_header_len),
+                                      MYF(0));
   calc_server_version_split();
+  if (!is_version_before_checksum(&server_version_split))
+  {
+    /* the last bytes are the checksum alg desc and value (or value's room) */
+    number_of_event_types -= BINLOG_CHECKSUM_ALG_DESC_LEN;
+    checksum_alg= post_header_len[number_of_event_types];
+  }
+  else
+  {
+    checksum_alg= (uint8) BINLOG_CHECKSUM_ALG_UNDEF;
+  }
 
   /*
     In some previous versions, the events were given other event type
@@ -4105,21 +4447,59 @@ Format_description_log_event(const char* buf,
 #ifndef MYSQL_CLIENT
 bool Format_description_log_event::write(IO_CACHE* file)
 {
+  bool ret;
+  bool no_checksum;
   /*
     We don't call Start_log_event_v3::write() because this would make 2
     my_b_safe_write().
   */
-  uchar buff[FORMAT_DESCRIPTION_HEADER_LEN];
+  uchar buff[FORMAT_DESCRIPTION_HEADER_LEN + BINLOG_CHECKSUM_ALG_DESC_LEN];
+  size_t rec_size= sizeof(buff);
   int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
   memcpy((char*) buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
   if (!dont_set_created)
-    created= when= get_time();
+    created= get_time();
   int4store(buff + ST_CREATED_OFFSET,created);
   buff[ST_COMMON_HEADER_LEN_OFFSET]= LOG_EVENT_HEADER_LEN;
-  memcpy((char*) buff+ST_COMMON_HEADER_LEN_OFFSET+1, (uchar*) post_header_len,
+  memcpy((char*) buff+ST_COMMON_HEADER_LEN_OFFSET + 1, (uchar*) post_header_len,
          LOG_EVENT_TYPES);
-  return (write_header(file, sizeof(buff)) ||
-          my_b_safe_write(file, buff, sizeof(buff)));
+  /*
+    if checksum is requested
+    record the checksum-algorithm descriptor next to
+    post_header_len vector which will be followed by the checksum value.
+    Master is supposed to trigger checksum computing by binlog_checksum_options,
+    slave does it via marking the event according to
+    FD_queue checksum_alg value.
+  */
+  compile_time_assert(sizeof(BINLOG_CHECKSUM_ALG_DESC_LEN == 1));
+#ifndef DBUG_OFF
+  data_written= 0; // to prepare for need_checksum assert
+#endif
+  buff[FORMAT_DESCRIPTION_HEADER_LEN]= need_checksum() ?
+    checksum_alg : (uint8) BINLOG_CHECKSUM_ALG_OFF;
+  /* 
+     FD of checksum-aware server is always checksum-equipped, (V) is in,
+     regardless of @@global.binlog_checksum policy.
+     Thereby a combination of (A) == 0, (V) != 0 means
+     it's the checksum-aware server's FD event that heads checksum-free binlog
+     file. 
+     Here 0 stands for checksumming OFF to evaluate (V) as 0 is that case.
+     A combination of (A) != 0, (V) != 0 denotes FD of the checksum-aware server
+     heading the checksummed binlog.
+     (A), (V) presence in FD of the checksum-aware server makes the event
+     1 + 4 bytes bigger comparing to the former FD.
+  */
+
+  if ((no_checksum= (checksum_alg == BINLOG_CHECKSUM_ALG_OFF)))
+  {
+    checksum_alg= BINLOG_CHECKSUM_ALG_CRC32;  // Forcing (V) room to fill anyway
+  }
+  ret= (write_header(file, rec_size) ||
+        wrapper_my_b_safe_write(file, buff, rec_size) ||
+        write_footer(file));
+  if (no_checksum)
+    checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
+  return ret;
 }
 #endif
 
@@ -4214,6 +4594,30 @@ Format_description_log_event::do_shall_skip(Relay_log_info *rli)
 
 #endif
 
+static inline void
+do_server_version_split(char* version,
+                        Format_description_log_event::master_version_split *split_versions)
+{
+  char *p= version, *r;
+  ulong number;
+  for (uint i= 0; i<=2; i++)
+  {
+    number= strtoul(p, &r, 10);
+    split_versions->ver[i]= (uchar) number;
+    DBUG_ASSERT(number < 256); // fit in uchar
+    p= r;
+    DBUG_ASSERT(!((i == 0) && (*r != '.'))); // should be true in practice
+    if (*r == '.')
+      p++; // skip the dot
+  }
+  if (strstr(p, "MariaDB") != 0 || strstr(p, "-maria-") != 0)
+    split_versions->kind=
+      Format_description_log_event::master_version_split::KIND_MARIADB;
+  else
+    split_versions->kind=
+      Format_description_log_event::master_version_split::KIND_MYSQL;
+}
+
 
 /**
    Splits the event's 'server_version' string into three numeric pieces stored
@@ -4226,24 +4630,67 @@ Format_description_log_event::do_shall_skip(Relay_log_info *rli)
 */
 void Format_description_log_event::calc_server_version_split()
 {
-  char *p= server_version, *r;
-  ulong number;
-  for (uint i= 0; i<=2; i++)
-  {
-    number= strtoul(p, &r, 10);
-    server_version_split[i]= (uchar)number;
-    DBUG_ASSERT(number < 256); // fit in uchar
-    p= r;
-    DBUG_ASSERT(!((i == 0) && (*r != '.'))); // should be true in practice
-    if (*r == '.')
-      p++; // skip the dot
-  }
+  do_server_version_split(server_version, &server_version_split);
+
   DBUG_PRINT("info",("Format_description_log_event::server_version_split:"
                      " '%s' %d %d %d", server_version,
-                     server_version_split[0],
-                     server_version_split[1], server_version_split[2]));
+                     server_version_split.ver[0],
+                     server_version_split.ver[1], server_version_split.ver[2]));
+}
+
+static inline ulong
+version_product(const Format_description_log_event::master_version_split* version_split)
+{
+  return ((version_split->ver[0] * 256 + version_split->ver[1]) * 256
+          + version_split->ver[2]);
+}
+
+/**
+   @return TRUE is the event's version is earlier than one that introduced
+   the replication event checksum. FALSE otherwise.
+*/
+bool
+Format_description_log_event::is_version_before_checksum(const master_version_split
+                                                         *version_split)
+{
+  return version_product(version_split) <
+    (version_split->kind == master_version_split::KIND_MARIADB ?
+     checksum_version_product_mariadb : checksum_version_product_mysql);
 }
 
+/**
+   @param buf buffer holding serialized FD event
+   @param len netto (possible checksum is stripped off) length of the event buf
+   
+   @return  the version-safe checksum alg descriptor where zero
+            designates no checksum, 255 - the orginator is
+            checksum-unaware (effectively no checksum) and the actuall
+            [1-254] range alg descriptor.
+*/
+uint8 get_checksum_alg(const char* buf, ulong len)
+{
+  uint8 ret;
+  char version[ST_SERVER_VER_LEN];
+  Format_description_log_event::master_version_split version_split;
+
+  DBUG_ENTER("get_checksum_alg");
+  DBUG_ASSERT(buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT);
+
+  memcpy(version, buf +
+         buf[LOG_EVENT_MINIMAL_HEADER_LEN + ST_COMMON_HEADER_LEN_OFFSET]
+         + ST_SERVER_VER_OFFSET, ST_SERVER_VER_LEN);
+  version[ST_SERVER_VER_LEN - 1]= 0;
+  
+  do_server_version_split(version, &version_split);
+  ret= Format_description_log_event::is_version_before_checksum(&version_split) ?
+    (uint8) BINLOG_CHECKSUM_ALG_UNDEF :
+    * (uint8*) (buf + len - BINLOG_CHECKSUM_LEN - BINLOG_CHECKSUM_ALG_DESC_LEN);
+  DBUG_ASSERT(ret == BINLOG_CHECKSUM_ALG_OFF ||
+              ret == BINLOG_CHECKSUM_ALG_UNDEF ||
+              ret == BINLOG_CHECKSUM_ALG_CRC32);
+  DBUG_RETURN(ret);
+}
+  
 
   /**************************************************************************
         Load_log_event methods
@@ -4550,8 +4997,8 @@ Load_log_event::Load_log_event(const char *buf, uint event_len,
   */
   if (event_len)
     copy_log_event(buf, event_len,
-                   ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
-                    LOAD_HEADER_LEN + 
+                   (((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+                   LOAD_HEADER_LEN + 
                     description_event->common_header_len :
                     LOAD_HEADER_LEN + LOG_EVENT_HEADER_LEN),
                    description_event);
@@ -4588,7 +5035,7 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
   */
   if (!(field_lens= (uchar*)sql_ex.init((char*)buf + body_offset,
                                         buf_end,
-                                        buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
+                                        (uchar)buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
     DBUG_RETURN(1);
   
   data_len = event_len - body_offset;
@@ -4789,7 +5236,7 @@ int Load_log_event::do_apply_event(NET* net, Relay_log_info const *rli,
   */
   lex_start(thd);
   thd->lex->local_file= local_fname;
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, 0);
 
   if (!use_rli_only_for_errors)
   {
@@ -4826,7 +5273,7 @@ int Load_log_event::do_apply_event(NET* net, Relay_log_info const *rli,
   */
   if (rpl_filter->db_ok(thd->db))
   {
-    thd->set_time((time_t)when);
+    thd->set_time(when, when_sec_part);
     thd->set_query_id(next_query_id());
     thd->warning_info->opt_clear_warning_info(thd->query_id);
 
@@ -5100,6 +5547,7 @@ Rotate_log_event::Rotate_log_event(const char* new_log_ident_arg,
   DBUG_PRINT("enter",("new_log_ident: %s  pos: %s  flags: %lu", new_log_ident_arg,
                       llstr(pos_arg, buff), (ulong) flags));
 #endif
+  cache_type= EVENT_NO_CACHE;
   if (flags & DUP_NAME)
     new_log_ident= my_strndup(new_log_ident_arg, ident_len, MYF(MY_WME));
   if (flags & RELAY_LOG)
@@ -5141,9 +5589,11 @@ bool Rotate_log_event::write(IO_CACHE* file)
 {
   char buf[ROTATE_HEADER_LEN];
   int8store(buf + R_POS_OFFSET, pos);
-  return (write_header(file, ROTATE_HEADER_LEN + ident_len) ||
-          my_b_safe_write(file, (uchar*)buf, ROTATE_HEADER_LEN) ||
-          my_b_safe_write(file, (uchar*)new_log_ident, (uint) ident_len));
+  return (write_header(file, ROTATE_HEADER_LEN + ident_len) || 
+          wrapper_my_b_safe_write(file, (uchar*) buf, ROTATE_HEADER_LEN) ||
+          wrapper_my_b_safe_write(file, (uchar*) new_log_ident,
+                                     (uint) ident_len) ||
+          write_footer(file));
 }
 #endif
 
@@ -5312,7 +5762,8 @@ bool Intvar_log_event::write(IO_CACHE* file)
   buf[I_TYPE_OFFSET]= (uchar) type;
   int8store(buf + I_VAL_OFFSET, val);
   return (write_header(file, sizeof(buf)) ||
-          my_b_safe_write(file, buf, sizeof(buf)));
+          wrapper_my_b_safe_write(file, buf, sizeof(buf)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -5440,7 +5891,8 @@ bool Rand_log_event::write(IO_CACHE* file)
   int8store(buf + RAND_SEED1_OFFSET, seed1);
   int8store(buf + RAND_SEED2_OFFSET, seed2);
   return (write_header(file, sizeof(buf)) ||
-          my_b_safe_write(file, buf, sizeof(buf)));
+          wrapper_my_b_safe_write(file, buf, sizeof(buf)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -5542,8 +5994,9 @@ Xid_log_event(const char* buf,
 bool Xid_log_event::write(IO_CACHE* file)
 {
   DBUG_EXECUTE_IF("do_not_write_xid", return 0;);
-  return write_header(file, sizeof(xid)) ||
-         my_b_safe_write(file, (uchar*) &xid, sizeof(xid));
+  return (write_header(file, sizeof(xid)) ||
+	  wrapper_my_b_safe_write(file, (uchar*) &xid, sizeof(xid)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -5665,7 +6118,7 @@ void User_var_log_event::pack_info(Protocol* protocol)
       break;
     case ROW_RESULT:
     default:
-      DBUG_ASSERT(1);
+      DBUG_ASSERT(0);
       return;
     }
   }
@@ -5722,8 +6175,21 @@ User_var_log_event(const char* buf,
       we keep the flags set to UNDEF_F.
     */
     uint bytes_read= ((val + val_len) - start);
-    DBUG_ASSERT(bytes_read==data_written || 
-                bytes_read==(data_written-1));
+#ifndef DBUG_OFF
+    bool old_pre_checksum_fd= description_event->is_version_before_checksum(
+        &description_event->server_version_split);
+#endif
+    DBUG_ASSERT((bytes_read == data_written -
+                 (old_pre_checksum_fd ||
+                  (description_event->checksum_alg ==
+                   BINLOG_CHECKSUM_ALG_OFF)) ?
+                 0 : BINLOG_CHECKSUM_LEN)
+                ||
+                (bytes_read == data_written -1 -
+                 (old_pre_checksum_fd ||
+                  (description_event->checksum_alg ==
+                   BINLOG_CHECKSUM_ALG_OFF)) ?
+                 0 : BINLOG_CHECKSUM_LEN));
     if ((data_written - bytes_read) > 0)
     {
       flags= (uint) *(buf + UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE +
@@ -5780,7 +6246,7 @@ bool User_var_log_event::write(IO_CACHE* file)
       break;
     case ROW_RESULT:
     default:
-      DBUG_ASSERT(1);
+      DBUG_ASSERT(0);
       return 0;
     }
     int4store(buf1 + 2 + UV_CHARSET_NUMBER_SIZE, val_len);
@@ -5791,11 +6257,12 @@ bool User_var_log_event::write(IO_CACHE* file)
   event_length= sizeof(buf)+ name_len + buf1_length + val_len + unsigned_len;
 
   return (write_header(file, event_length) ||
-          my_b_safe_write(file, (uchar*) buf, sizeof(buf))   ||
-          my_b_safe_write(file, (uchar*) name, name_len)     ||
-          my_b_safe_write(file, (uchar*) buf1, buf1_length) ||
-          my_b_safe_write(file, pos, val_len) ||
-          my_b_safe_write(file, &flags, unsigned_len));
+          wrapper_my_b_safe_write(file, (uchar*) buf, sizeof(buf))   ||
+	  wrapper_my_b_safe_write(file, (uchar*) name, name_len)     ||
+	  wrapper_my_b_safe_write(file, (uchar*) buf1, buf1_length) ||
+	  wrapper_my_b_safe_write(file, pos, val_len) ||
+          wrapper_my_b_safe_write(file, &flags, unsigned_len) ||
+          write_footer(file));
 }
 #endif
 
@@ -5900,7 +6367,7 @@ void User_var_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info)
       break;
     case ROW_RESULT:
     default:
-      DBUG_ASSERT(1);
+      DBUG_ASSERT(0);
       return;
     }
   }
@@ -5963,7 +6430,7 @@ int User_var_log_event::do_apply_event(Relay_log_info const *rli)
       break;
     case ROW_RESULT:
     default:
-      DBUG_ASSERT(1);
+      DBUG_ASSERT(0);
       return 0;
     }
   }
@@ -6322,7 +6789,7 @@ Create_file_log_event::Create_file_log_event(const char* buf, uint len,
   uint8 create_file_header_len= description_event->post_header_len[CREATE_FILE_EVENT-1];
   if (!(event_buf= (char*) my_memdup(buf, len, MYF(MY_WME))) ||
       copy_log_event(event_buf,len,
-                     ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+                     (((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
                       load_header_len + header_len :
                       (fake_base ? (header_len+load_header_len) :
                        (header_len+load_header_len) +
@@ -6408,7 +6875,7 @@ void Create_file_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
 void Create_file_log_event::pack_info(Protocol *protocol)
 {
-  char buf[NAME_LEN*2 + 30 + 21*2], *pos;
+  char buf[SAFE_NAME_LEN*2 + 30 + 21*2], *pos;
   pos= strmov(buf, "db=");
   memcpy(pos, db, db_len);
   pos= strmov(pos + db_len, ";table=");
@@ -6561,8 +7028,9 @@ bool Append_block_log_event::write(IO_CACHE* file)
   uchar buf[APPEND_BLOCK_HEADER_LEN];
   int4store(buf + AB_FILE_ID_OFFSET, file_id);
   return (write_header(file, APPEND_BLOCK_HEADER_LEN + block_len) ||
-          my_b_safe_write(file, buf, APPEND_BLOCK_HEADER_LEN) ||
-	  my_b_safe_write(file, (uchar*) block, block_len));
+          wrapper_my_b_safe_write(file, buf, APPEND_BLOCK_HEADER_LEN) ||
+	  wrapper_my_b_safe_write(file, (uchar*) block, block_len) ||
+	  write_footer(file));
 }
 #endif
 
@@ -6594,9 +7062,8 @@ void Append_block_log_event::print(FILE* file,
 void Append_block_log_event::pack_info(Protocol *protocol)
 {
   char buf[256];
-  size_t length;
-  length= my_snprintf(buf, sizeof(buf), ";file_id=%u;block_len=%u",
-                      file_id, block_len);
+  uint length;
+  length= (uint) sprintf(buf, ";file_id=%u;block_len=%u", file_id, block_len);
   protocol->store(buf, length, &my_charset_bin);
 }
 
@@ -6631,7 +7098,7 @@ int Append_block_log_event::do_apply_event(Relay_log_info const *rli)
       as the present method does not call mysql_parse().
     */
     lex_start(thd);
-    mysql_reset_thd_for_next_command(thd);
+    mysql_reset_thd_for_next_command(thd, 0);
     /* old copy may exist already */
     mysql_file_delete(key_file_log_event_data, fname, MYF(0));
     if ((fd= mysql_file_create(key_file_log_event_data,
@@ -6721,7 +7188,8 @@ bool Delete_file_log_event::write(IO_CACHE* file)
  uchar buf[DELETE_FILE_HEADER_LEN];
  int4store(buf + DF_FILE_ID_OFFSET, file_id);
  return (write_header(file, sizeof(buf)) ||
-         my_b_safe_write(file, buf, sizeof(buf)));
+         wrapper_my_b_safe_write(file, buf, sizeof(buf)) ||
+	 write_footer(file));
 }
 #endif
 
@@ -6751,9 +7219,9 @@ void Delete_file_log_event::print(FILE* file,
 void Delete_file_log_event::pack_info(Protocol *protocol)
 {
   char buf[64];
-  size_t length;
-  length= my_snprintf(buf, sizeof(buf), ";file_id=%u", (uint) file_id);
-  protocol->store(buf, length, &my_charset_bin);
+  uint length;
+  length= (uint) sprintf(buf, ";file_id=%u", (uint) file_id);
+  protocol->store(buf, (int32) length, &my_charset_bin);
 }
 #endif
 
@@ -6818,7 +7286,8 @@ bool Execute_load_log_event::write(IO_CACHE* file)
   uchar buf[EXEC_LOAD_HEADER_LEN];
   int4store(buf + EL_FILE_ID_OFFSET, file_id);
   return (write_header(file, sizeof(buf)) || 
-          my_b_safe_write(file, buf, sizeof(buf)));
+          wrapper_my_b_safe_write(file, buf, sizeof(buf)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -6849,9 +7318,9 @@ void Execute_load_log_event::print(FILE* file,
 void Execute_load_log_event::pack_info(Protocol *protocol)
 {
   char buf[64];
-  size_t length;
-  length= my_snprintf(buf, sizeof(buf), ";file_id=%u", (uint) file_id);
-  protocol->store(buf, length, &my_charset_bin);
+  uint length;
+  length= (uint) sprintf(buf, ";file_id=%u", (uint) file_id);
+  protocol->store(buf, (int32) length, &my_charset_bin);
 }
 
 
@@ -6880,16 +7349,17 @@ int Execute_load_log_event::do_apply_event(Relay_log_info const *rli)
                 fname);
     goto err;
   }
-  if (!(lev = (Load_log_event*)Log_event::read_log_event(&file,
-                                                         (mysql_mutex_t*)0,
-                                                         rli->relay_log.description_event_for_exec)) ||
+  if (!(lev= (Load_log_event*)
+        Log_event::read_log_event(&file,
+                                  (mysql_mutex_t*)0,
+                                  rli->relay_log.description_event_for_exec,
+                                  opt_slave_sql_verify_checksum)) ||
       lev->get_type_code() != NEW_LOAD_EVENT)
   {
     rli->report(ERROR_LEVEL, 0, "Error in Exec_load event: "
                     "file '%s' appears corrupted", fname);
     goto err;
   }
-
   lev->thd = thd;
   /*
     lev->do_apply_event should use rli only for errors i.e. should
@@ -7052,7 +7522,7 @@ Execute_load_query_log_event::write_post_header_for_derived(IO_CACHE* file)
   int4store(buf + 4, fn_pos_start);
   int4store(buf + 4 + 4, fn_pos_end);
   *(buf + 4 + 4 + 4)= (uchar) dup_handling;
-  return my_b_safe_write(file, buf, EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN);
+  return wrapper_my_b_safe_write(file, buf, EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN);
 }
 #endif
 
@@ -7288,7 +7758,8 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid,
     m_width(tbl_arg ? tbl_arg->s->fields : 1),
     m_rows_buf(0), m_rows_cur(0), m_rows_end(0), m_flags(0) 
 #ifdef HAVE_REPLICATION
-    , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL)
+    , m_curr_row(NULL), m_curr_row_end(NULL),
+    m_key(NULL), m_key_info(NULL), m_key_nr(0)
 #endif
 {
   /*
@@ -7336,7 +7807,8 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
 #endif
     m_table_id(0), m_rows_buf(0), m_rows_cur(0), m_rows_end(0)
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
-    , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL)
+    , m_curr_row(NULL), m_curr_row_end(NULL),
+    m_key(NULL), m_key_info(NULL), m_key_nr(0)
 #endif
 {
   DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)");
@@ -7485,7 +7957,7 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   DBUG_DUMP("row_data", row_data, min(length, 32));
 #endif
 
@@ -7577,7 +8049,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
       we need to do any changes to that value after this function.
     */
     lex_start(thd);
-    mysql_reset_thd_for_next_command(thd);
+    mysql_reset_thd_for_next_command(thd, 0);
     /*
       The current statement is just about to begin and 
       has not yet modified anything. Note, all.modified is reset
@@ -7686,7 +8158,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
       const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
     }
 #ifdef HAVE_QUERY_CACHE
-    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
+    query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock);
 #endif
   }
 
@@ -7713,7 +8185,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
       TIMESTAMP column to a table with one.
       So we call set_time(), like in SBR. Presently it changes nothing.
     */
-    thd->set_time((time_t)when);
+    thd->set_time(when, when_sec_part);
 
     /*
       Now we are in a statement and will stay in a statement until we
@@ -8016,11 +8488,11 @@ bool Rows_log_event::write_data_header(IO_CACHE *file)
                   {
                     int4store(buf + 0, m_table_id);
                     int2store(buf + 4, m_flags);
-                    return (my_b_safe_write(file, buf, 6));
+                    return (wrapper_my_b_safe_write(file, buf, 6));
                   });
   int6store(buf + RW_MAPID_OFFSET, (ulonglong)m_table_id);
   int2store(buf + RW_FLAGS_OFFSET, m_flags);
-  return (my_b_safe_write(file, buf, ROWS_HEADER_LEN));
+  return (wrapper_my_b_safe_write(file, buf, ROWS_HEADER_LEN));
 }
 
 bool Rows_log_event::write_data_body(IO_CACHE*file)
@@ -8036,10 +8508,10 @@ bool Rows_log_event::write_data_body(IO_CACHE*file)
   DBUG_ASSERT(static_cast<size_t>(sbuf_end - sbuf) <= sizeof(sbuf));
 
   DBUG_DUMP("m_width", sbuf, (size_t) (sbuf_end - sbuf));
-  res= res || my_b_safe_write(file, sbuf, (size_t) (sbuf_end - sbuf));
+  res= res || wrapper_my_b_safe_write(file, sbuf, (size_t) (sbuf_end - sbuf));
 
   DBUG_DUMP("m_cols", (uchar*) m_cols.bitmap, no_bytes_in_map(&m_cols));
-  res= res || my_b_safe_write(file, (uchar*) m_cols.bitmap,
+  res= res || wrapper_my_b_safe_write(file, (uchar*) m_cols.bitmap,
                               no_bytes_in_map(&m_cols));
   /*
     TODO[refactor write]: Remove the "down cast" here (and elsewhere).
@@ -8048,11 +8520,11 @@ bool Rows_log_event::write_data_body(IO_CACHE*file)
   {
     DBUG_DUMP("m_cols_ai", (uchar*) m_cols_ai.bitmap,
               no_bytes_in_map(&m_cols_ai));
-    res= res || my_b_safe_write(file, (uchar*) m_cols_ai.bitmap,
+    res= res || wrapper_my_b_safe_write(file, (uchar*) m_cols_ai.bitmap,
                                 no_bytes_in_map(&m_cols_ai));
   }
   DBUG_DUMP("rows", m_rows_buf, data_size);
-  res= res || my_b_safe_write(file, m_rows_buf, (size_t) data_size);
+  res= res || wrapper_my_b_safe_write(file, m_rows_buf, (size_t) data_size);
 
   return res;
 
@@ -8097,6 +8569,144 @@ void Rows_log_event::print_helper(FILE *file,
 #endif
 
 /**************************************************************************
+	Annotate_rows_log_event member functions
+**************************************************************************/
+
+#ifndef MYSQL_CLIENT
+Annotate_rows_log_event::Annotate_rows_log_event(THD *thd,
+                                                 uint16 cache_type_arg)
+  : Log_event(thd, 0, true),
+    m_save_thd_query_txt(0),
+    m_save_thd_query_len(0)
+{
+  m_query_txt= thd->query();
+  m_query_len= thd->query_length();
+  cache_type= cache_type_arg;
+}
+#endif
+
+Annotate_rows_log_event::Annotate_rows_log_event(const char *buf,
+                                                 uint event_len,
+                                      const Format_description_log_event *desc)
+  : Log_event(buf, desc),
+    m_save_thd_query_txt(0),
+    m_save_thd_query_len(0)
+{
+  m_query_len= event_len - desc->common_header_len;
+  m_query_txt= (char*) buf + desc->common_header_len;
+}
+
+Annotate_rows_log_event::~Annotate_rows_log_event()
+{
+#ifndef MYSQL_CLIENT
+  if (m_save_thd_query_txt)
+    thd->set_query(m_save_thd_query_txt, m_save_thd_query_len);
+#endif
+}
+
+int Annotate_rows_log_event::get_data_size()
+{
+  return m_query_len;
+}
+
+Log_event_type Annotate_rows_log_event::get_type_code()
+{
+  return ANNOTATE_ROWS_EVENT;
+}
+
+bool Annotate_rows_log_event::is_valid() const
+{
+  return (m_query_txt != NULL && m_query_len != 0);
+}
+
+#ifndef MYSQL_CLIENT
+bool Annotate_rows_log_event::write_data_header(IO_CACHE *file)
+{ 
+  return 0;
+}
+#endif
+
+#ifndef MYSQL_CLIENT
+bool Annotate_rows_log_event::write_data_body(IO_CACHE *file)
+{
+  return wrapper_my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+void Annotate_rows_log_event::pack_info(Protocol* protocol)
+{
+  if (m_query_txt && m_query_len)
+    protocol->store(m_query_txt, m_query_len, &my_charset_bin);
+}
+#endif
+
+#ifdef MYSQL_CLIENT
+void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
+{
+  if (pinfo->short_form)
+    return;
+
+  print_header(&pinfo->head_cache, pinfo, TRUE);
+  my_b_printf(&pinfo->head_cache, "\tAnnotate_rows:\n");
+
+  char *pbeg;   // beginning of the next line
+  char *pend;   // end of the next line
+  uint cnt= 0;  // characters counter
+
+  for (pbeg= m_query_txt; ; pbeg= pend)
+  {
+    // skip all \r's and \n's at the beginning of the next line
+    for (;; pbeg++)
+    {
+      if (++cnt > m_query_len)
+        return;
+
+      if (*pbeg != '\r' && *pbeg != '\n')
+        break;
+    }
+
+    // find end of the next line
+    for (pend= pbeg + 1;
+         ++cnt <= m_query_len && *pend != '\r' && *pend != '\n';
+         pend++)
+      ;
+
+    // print next line
+    my_b_write(&pinfo->head_cache, (const uchar*) "#Q> ", 4);
+    my_b_write(&pinfo->head_cache, (const uchar*) pbeg, pend - pbeg);
+    my_b_write(&pinfo->head_cache, (const uchar*) "\n", 1);
+  }
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Annotate_rows_log_event::do_apply_event(Relay_log_info const *rli)
+{
+  m_save_thd_query_txt= thd->query();
+  m_save_thd_query_len= thd->query_length();
+  thd->set_query(m_query_txt, m_query_len);
+  return 0;
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Annotate_rows_log_event::do_update_pos(Relay_log_info *rli)
+{
+  rli->inc_event_relay_log_pos();
+  return 0;
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+Log_event::enum_skip_reason
+Annotate_rows_log_event::do_shall_skip(Relay_log_info *rli)
+{
+  return continue_group(rli);
+}
+#endif
+
+/**************************************************************************
 	Table_map_log_event member functions and support functions
 **************************************************************************/
 
@@ -8293,7 +8903,7 @@ Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   DBUG_DUMP("event buffer", (uchar*) buf, event_len);
 #endif
 
@@ -8381,6 +8991,111 @@ Table_map_log_event::~Table_map_log_event()
   my_free(m_memory);
 }
 
+
+#ifdef MYSQL_CLIENT
+
+/*
+  Rewrite database name for the event to name specified by new_db
+  SYNOPSIS
+    new_db   Database name to change to
+    new_len  Length
+    desc     Event describing binlog that we're writing to.
+
+  DESCRIPTION
+    Reset db name. This function assumes that temp_buf member contains event
+    representation taken from a binary log. It resets m_dbnam and m_dblen and
+    rewrites temp_buf with new db name.
+
+  RETURN 
+    0     - Success
+    other - Error
+*/
+
+int Table_map_log_event::rewrite_db(const char* new_db, size_t new_len,
+                                    const Format_description_log_event* desc)
+{
+  DBUG_ENTER("Table_map_log_event::rewrite_db");
+  DBUG_ASSERT(temp_buf);
+
+  uint header_len= min(desc->common_header_len,
+                       LOG_EVENT_MINIMAL_HEADER_LEN) + TABLE_MAP_HEADER_LEN;
+  int len_diff;
+
+  if (!(len_diff= new_len - m_dblen))
+  {
+    memcpy((void*) (temp_buf + header_len + 1), new_db, m_dblen + 1);
+    memcpy((void*) m_dbnam, new_db, m_dblen + 1);
+    DBUG_RETURN(0);
+  }
+
+  // Create new temp_buf
+  ulong event_cur_len= uint4korr(temp_buf + EVENT_LEN_OFFSET);
+  ulong event_new_len= event_cur_len + len_diff;
+  char* new_temp_buf= (char*) my_malloc(event_new_len, MYF(MY_WME));
+
+  if (!new_temp_buf)
+  {
+    sql_print_error("Table_map_log_event::rewrite_db: "
+                    "failed to allocate new temp_buf (%d bytes required)",
+                    event_new_len);
+    DBUG_RETURN(-1);
+  }
+
+  // Rewrite temp_buf
+  char* ptr= new_temp_buf;
+  ulong cnt= 0;
+
+  // Copy header and change event length
+  memcpy(ptr, temp_buf, header_len);
+  int4store(ptr + EVENT_LEN_OFFSET, event_new_len);
+  ptr += header_len;
+  cnt += header_len;
+
+  // Write new db name length and new name
+  *ptr++ = new_len;
+  memcpy(ptr, new_db, new_len + 1);
+  ptr += new_len + 1;
+  cnt += m_dblen + 2;
+
+  // Copy rest part
+  memcpy(ptr, temp_buf + cnt, event_cur_len - cnt);
+
+  // Reregister temp buf
+  free_temp_buf();
+  register_temp_buf(new_temp_buf, TRUE);
+
+  // Reset m_dbnam and m_dblen members
+  m_dblen= new_len;
+
+  // m_dbnam resides in m_memory together with m_tblnam and m_coltype
+  uchar* memory= m_memory;
+  char const* tblnam= m_tblnam;
+  uchar* coltype= m_coltype;
+
+  m_memory= (uchar*) my_multi_malloc(MYF(MY_WME),
+                                     &m_dbnam, (uint) m_dblen + 1,
+                                     &m_tblnam, (uint) m_tbllen + 1,
+                                     &m_coltype, (uint) m_colcnt,
+                                     NullS);
+
+  if (!m_memory)
+  {
+    sql_print_error("Table_map_log_event::rewrite_db: "
+                    "failed to allocate new m_memory (%d + %d + %d bytes required)",
+                    m_dblen + 1, m_tbllen + 1, m_colcnt);
+    DBUG_RETURN(-1);
+  }
+
+  memcpy((void*)m_dbnam, new_db, m_dblen + 1);
+  memcpy((void*)m_tblnam, tblnam, m_tbllen + 1);
+  memcpy(m_coltype, coltype, m_colcnt);
+
+  my_free(memory);
+  DBUG_RETURN(0);
+}
+#endif /* MYSQL_CLIENT */
+
+
 /*
   Return value is an error code, one of:
 
@@ -8615,11 +9330,11 @@ bool Table_map_log_event::write_data_header(IO_CACHE *file)
                   {
                     int4store(buf + 0, m_table_id);
                     int2store(buf + 4, m_flags);
-                    return (my_b_safe_write(file, buf, 6));
+                    return (wrapper_my_b_safe_write(file, buf, 6));
                   });
   int6store(buf + TM_MAPID_OFFSET, (ulonglong)m_table_id);
   int2store(buf + TM_FLAGS_OFFSET, m_flags);
-  return (my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN));
+  return (wrapper_my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN));
 }
 
 bool Table_map_log_event::write_data_body(IO_CACHE *file)
@@ -8643,15 +9358,15 @@ bool Table_map_log_event::write_data_body(IO_CACHE *file)
   uchar mbuf[sizeof(m_field_metadata_size)];
   uchar *const mbuf_end= net_store_length(mbuf, m_field_metadata_size);
 
-  return (my_b_safe_write(file, dbuf,      sizeof(dbuf)) ||
-          my_b_safe_write(file, (const uchar*)m_dbnam,   m_dblen+1) ||
-          my_b_safe_write(file, tbuf,      sizeof(tbuf)) ||
-          my_b_safe_write(file, (const uchar*)m_tblnam,  m_tbllen+1) ||
-          my_b_safe_write(file, cbuf, (size_t) (cbuf_end - cbuf)) ||
-          my_b_safe_write(file, m_coltype, m_colcnt) ||
-          my_b_safe_write(file, mbuf, (size_t) (mbuf_end - mbuf)) ||
-          my_b_safe_write(file, m_field_metadata, m_field_metadata_size),
-          my_b_safe_write(file, m_null_bits, (m_colcnt + 7) / 8));
+  return (wrapper_my_b_safe_write(file, dbuf,      sizeof(dbuf)) ||
+          wrapper_my_b_safe_write(file, (const uchar*)m_dbnam,   m_dblen+1) ||
+          wrapper_my_b_safe_write(file, tbuf,      sizeof(tbuf)) ||
+          wrapper_my_b_safe_write(file, (const uchar*)m_tblnam,  m_tbllen+1) ||
+          wrapper_my_b_safe_write(file, cbuf, (size_t) (cbuf_end - cbuf)) ||
+          wrapper_my_b_safe_write(file, m_coltype, m_colcnt) ||
+          wrapper_my_b_safe_write(file, mbuf, (size_t) (mbuf_end - mbuf)) ||
+          wrapper_my_b_safe_write(file, m_field_metadata, m_field_metadata_size),
+          wrapper_my_b_safe_write(file, m_null_bits, (m_colcnt + 7) / 8));
  }
 #endif
 
@@ -8977,7 +9692,7 @@ Rows_log_event::write_row(const Relay_log_info *const rli,
     if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
     {
       DBUG_PRINT("info",("Locating offending record using rnd_pos()"));
-      error= table->file->rnd_pos(table->record[1], table->file->dup_ref);
+      error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref);
       if (error)
       {
         DBUG_PRINT("info",("rnd_pos() returns error %d",error));
@@ -9009,10 +9724,10 @@ Rows_log_event::write_row(const Relay_log_info *const rli,
 
       key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
                0);
-      error= table->file->index_read_idx_map(table->record[1], keynum,
-                                             (const uchar*)key.get(),
-                                             HA_WHOLE_KEY,
-                                             HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[1], keynum,
+                                                (const uchar*)key.get(),
+                                                HA_WHOLE_KEY,
+                                                HA_READ_KEY_EXACT);
       if (error)
       {
         DBUG_PRINT("info",("index_read_idx() returns %s", HA_ERR(error)));
@@ -9252,13 +9967,93 @@ record_compare_exit:
   return result;
 }
 
+
+/**
+  Find the best key to use when locating the row in @c find_row().
+
+  A primary key is preferred if it exists; otherwise a unique index is
+  preferred. Else we pick the index with the smalles rec_per_key value.
+
+  If a suitable key is found, set @c m_key, @c m_key_nr and @c m_key_info
+  member fields appropriately.
+
+  @returns Error code on failure, 0 on success.
+*/
+int Rows_log_event::find_key()
+{
+  uint i, best_key_nr, last_part;
+  KEY *key, *best_key;
+  ulong best_rec_per_key, tmp;
+  DBUG_ENTER("Rows_log_event::find_key");
+  DBUG_ASSERT(m_table);
+
+  best_key_nr= MAX_KEY;
+  LINT_INIT(best_key);
+  LINT_INIT(best_rec_per_key);
+
+  /*
+    Keys are sorted so that any primary key is first, followed by unique keys,
+    followed by any other. So we will automatically pick the primary key if
+    it exists.
+  */
+  for (i= 0, key= m_table->key_info; i < m_table->s->keys; i++, key++)
+  {
+    if (!m_table->s->keys_in_use.is_set(i))
+      continue;
+    /*
+      We cannot use a unique key with NULL-able columns to uniquely identify
+      a row (but we can still select it for range scan below if nothing better
+      is available).
+    */
+    if ((key->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
+    {
+      best_key_nr= i;
+      best_key= key;
+      break;
+    }
+    /*
+      We can only use a non-unique key if it allows range scans (ie. skip
+      FULLTEXT indexes and such).
+    */
+    last_part= key->key_parts - 1;
+    DBUG_PRINT("info", ("Index %s rec_per_key[%u]= %lu",
+                        key->name, last_part, key->rec_per_key[last_part]));
+    if (!(m_table->file->index_flags(i, last_part, 1) & HA_READ_NEXT))
+      continue;
+
+    tmp= key->rec_per_key[last_part];
+    if (best_key_nr == MAX_KEY || (tmp > 0 && tmp < best_rec_per_key))
+    {
+      best_key_nr= i;
+      best_key= key;
+      best_rec_per_key= tmp;
+    }
+  }
+
+  if (best_key_nr == MAX_KEY)
+  {
+    m_key_info= NULL;
+    DBUG_RETURN(0);
+  }
+
+  // Allocate buffer for key searches
+  m_key= (uchar *) my_malloc(best_key->key_length, MYF(MY_WME));
+  if (m_key == NULL)
+    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+  m_key_info= best_key;
+  m_key_nr= best_key_nr;
+
+  DBUG_RETURN(0);;
+}
+
+
 /**
   Locate the current row in event's table.
 
-  The current row is pointed by @c m_curr_row. Member @c m_width tells how many 
-  columns are there in the row (this can be differnet from the number of columns 
-  in the table). It is assumed that event's table is already open and pointed 
-  by @c m_table.
+  The current row is pointed by @c m_curr_row. Member @c m_width tells
+  how many columns are there in the row (this can be differnet from
+  the number of columns in the table). It is assumed that event's
+  table is already open and pointed by @c m_table.
 
   If a corresponding record is found in the table it is stored in 
   @c m_table->record[0]. Note that when record is located based on a primary 
@@ -9319,13 +10114,14 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
       length. Something along these lines should work:
 
       ADD>>>  store_record(table,record[1]);
-              int error= table->file->rnd_pos(table->record[0], table->file->ref);
+              int error= table->file->ha_rnd_pos(table->record[0],
+              table->file->ref);
       ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
                                  table->s->reclength) == 0);
 
     */
     DBUG_PRINT("info",("locating record using primary key (position)"));
-    int error= table->file->rnd_pos_by_record(table->record[0]);
+    int error= table->file->ha_rnd_pos_by_record(table->record[0]);
     if (error)
     {
       DBUG_PRINT("info",("rnd_pos returns error %d",error));
@@ -9350,12 +10146,17 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
    */ 
   store_record(table,record[1]);    
 
-  if (table->s->keys > 0 && table->s->keys_in_use.is_set(0))
+  if (m_key_info)
   {
-    DBUG_PRINT("info",("locating record using primary key (index_read)"));
+    DBUG_PRINT("info",("locating record using key #%u [%s] (index_read)",
+                       m_key_nr, m_key_info->name));
+    /* We use this to test that the correct key is used in test cases. */
+    DBUG_EXECUTE_IF("slave_crash_if_wrong_index",
+                    if(0 != strcmp(m_key_info->name,"expected_key")) abort(););
 
-    /* The 0th key is active: search the table using the index */
-    if (!table->file->inited && (error= table->file->ha_index_init(0, FALSE)))
+    /* The key is active: search the table using the index */
+    if (!table->file->inited &&
+        (error= table->file->ha_index_init(m_key_nr, FALSE)))
     {
       DBUG_PRINT("info",("ha_index_init returns error %d",error));
       table->file->print_error(error, MYF(0));
@@ -9365,14 +10166,14 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
     /* Fill key data for the row */
 
     DBUG_ASSERT(m_key);
-    key_copy(m_key, table->record[0], table->key_info, 0);
+    key_copy(m_key, table->record[0], m_key_info, 0);
 
     /*
       Don't print debug messages when running valgrind since they can
       trigger false warnings.
      */
-#ifndef HAVE_purify
-    DBUG_DUMP("key data", m_key, table->key_info->key_length);
+#ifndef HAVE_valgrind
+    DBUG_DUMP("key data", m_key, m_key_info->key_length);
 #endif
 
     /*
@@ -9385,9 +10186,9 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
       table->record[0][table->s->null_bytes - 1]|=
         256U - (1U << table->s->last_null_bit_pos);
 
-    if ((error= table->file->index_read_map(table->record[0], m_key, 
-                                            HA_WHOLE_KEY,
-                                            HA_READ_KEY_EXACT)))
+    if ((error= table->file->ha_index_read_map(table->record[0], m_key, 
+                                               HA_WHOLE_KEY,
+                                               HA_READ_KEY_EXACT)))
     {
       DBUG_PRINT("info",("no record matching the key found in the table"));
       if (error == HA_ERR_RECORD_DELETED)
@@ -9401,7 +10202,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_PRINT("info",("found first matching record")); 
     DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
 #endif
@@ -9458,6 +10259,8 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
       record we are looking for is stored in record[1].
      */ 
     DBUG_PRINT("info",("non-unique index, scanning it to find matching record")); 
+    /* We use this to test that the correct key is used in test cases. */
+    DBUG_EXECUTE_IF("slave_crash_if_index_scan", abort(););
 
     while (record_compare(table))
     {
@@ -9476,7 +10279,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
           256U - (1U << table->s->last_null_bit_pos);
       }
 
-      while ((error= table->file->index_next(table->record[0])))
+      while ((error= table->file->ha_index_next(table->record[0])))
       {
         /* We just skip records that has already been deleted */
         if (error == HA_ERR_RECORD_DELETED)
@@ -9496,15 +10299,16 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
   else
   {
     DBUG_PRINT("info",("locating record using table scan (rnd_next)"));
+    /* We use this to test that the correct key is used in test cases. */
+    DBUG_EXECUTE_IF("slave_crash_if_table_scan", abort(););
 
     int restart_count= 0; // Number of times scanning has restarted from top
 
     /* We don't have a key: search the table using rnd_next() */
-    if ((error= table->file->ha_rnd_init(1)))
+    if ((error= table->file->ha_rnd_init_with_error(1)))
     {
       DBUG_PRINT("info",("error initializing table scan"
                          " (ha_rnd_init returns %d)",error));
-      table->file->print_error(error, MYF(0));
       goto err;
     }
 
@@ -9512,7 +10316,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
     do
     {
   restart_rnd_next:
-      error= table->file->rnd_next(table->record[0]);
+      error= table->file->ha_rnd_next(table->record[0]);
 
       if (error)
         DBUG_PRINT("info", ("error: %s", HA_ERR(error)));
@@ -9530,7 +10334,14 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
 
       case HA_ERR_END_OF_FILE:
         if (++restart_count < 2)
-          table->file->ha_rnd_init(1);
+        {
+          int error2;
+          if ((error2= table->file->ha_rnd_init_with_error(1)))
+          {
+            error= error2;
+            goto err;
+          }
+        }
         break;
 
       default:
@@ -9610,14 +10421,7 @@ Delete_rows_log_event::do_before_row_operations(const Slave_reporting_capability
     return 0;
   }
 
-  if (m_table->s->keys > 0)
-  {
-    // Allocate buffer for key searches
-    m_key= (uchar*)my_malloc(m_table->key_info->key_length, MYF(MY_WME));
-    if (!m_key)
-      return HA_ERR_OUT_OF_MEM;
-  }
-  return 0;
+  return find_key();
 }
 
 int 
@@ -9628,6 +10432,7 @@ Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability
   m_table->file->ha_index_or_rnd_end();
   my_free(m_key);
   m_key= NULL;
+  m_key_info= NULL;
 
   return error;
 }
@@ -9730,13 +10535,9 @@ Update_rows_log_event::Update_rows_log_event(const char *buf, uint event_len,
 int 
 Update_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const)
 {
-  if (m_table->s->keys > 0)
-  {
-    // Allocate buffer for key searches
-    m_key= (uchar*)my_malloc(m_table->key_info->key_length, MYF(MY_WME));
-    if (!m_key)
-      return HA_ERR_OUT_OF_MEM;
-  }
+  int err;
+  if ((err= find_key()))
+    return err;
 
   m_table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
 
@@ -9751,6 +10552,7 @@ Update_rows_log_event::do_after_row_operations(const Slave_reporting_capability
   m_table->file->ha_index_or_rnd_end();
   my_free(m_key); // Free for multi_malloc
   m_key= NULL;
+  m_key_info= NULL;
 
   return error;
 }
@@ -9794,7 +10596,7 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli)
     Now we have the right row to update.  The old row (the one we're
     looking for) is in record[1] and the new row is in record[0].
   */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   /*
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
@@ -9872,7 +10674,6 @@ Incident_log_event::description() const
   };
 
   DBUG_PRINT("info", ("m_incident: %d", m_incident));
-
   return description[m_incident];
 }
 
@@ -9927,13 +10728,25 @@ Incident_log_event::write_data_header(IO_CACHE *file)
   DBUG_PRINT("enter", ("m_incident: %d", m_incident));
   uchar buf[sizeof(int16)];
   int2store(buf, (int16) m_incident);
-  DBUG_RETURN(my_b_safe_write(file, buf, sizeof(buf)));
+#ifndef MYSQL_CLIENT
+  DBUG_RETURN(wrapper_my_b_safe_write(file, buf, sizeof(buf)));
+#else
+   DBUG_RETURN(my_b_safe_write(file, buf, sizeof(buf)));
+#endif
 }
 
 bool
 Incident_log_event::write_data_body(IO_CACHE *file)
 {
+  uchar tmp[1];
   DBUG_ENTER("Incident_log_event::write_data_body");
+  tmp[0]= (uchar) m_message.length;
+  crc= my_checksum(crc, (uchar*) tmp, 1);
+  if (m_message.length > 0)
+  {
+    crc= my_checksum(crc, (uchar*) m_message.str, m_message.length);
+    // todo: report a bug on write_str accepts uint but treats it as uchar
+  }
   DBUG_RETURN(write_str(file, m_message.str, (uint) m_message.length));
 }
 
@@ -9968,7 +10781,6 @@ st_print_event_info::st_print_event_info()
 }
 #endif
 
-
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
 Heartbeat_log_event::Heartbeat_log_event(const char* buf, uint event_len,
                     const Format_description_log_event* description_event)
@@ -9980,3 +10792,29 @@ Heartbeat_log_event::Heartbeat_log_event(const char* buf, uint event_len,
   log_ident= buf + header_size;
 }
 #endif
+
+#if defined(MYSQL_SERVER)
+/*
+  Access to the current replication position.
+
+  There is a dummy replacement for this in the embedded library that returns
+  FALSE; this is used by XtraDB to allow it to access replication stuff while
+  still being able to use the same plugin in both stand-alone and embedded.
+*/
+bool rpl_get_position_info(const char **log_file_name, ulonglong *log_pos,
+                           const char **group_relay_log_name,
+                           ulonglong *relay_log_pos)
+{
+#if defined(EMBEDDED_LIBRARY) || !defined(HAVE_REPLICATION)
+  return FALSE;
+#else
+  const Relay_log_info *rli= &(active_mi->rli);
+  *log_file_name= rli->group_master_log_name;
+  *log_pos= rli->group_master_log_pos +
+    (rli->future_event_relay_log_pos - rli->group_relay_log_pos);
+  *group_relay_log_name= rli->group_relay_log_name;
+  *relay_log_pos= rli->future_event_relay_log_pos;
+  return TRUE;
+#endif
+}
+#endif
diff --git a/sql/log_event.h b/sql/log_event.h
index 07656ebaa72..e1fdc70663b 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -77,6 +77,7 @@ class String;
 #define LOG_READ_MEM    -5
 #define LOG_READ_TRUNC  -6
 #define LOG_READ_TOO_LARGE -7
+#define LOG_READ_CHECKSUM_FAILURE -8
 
 #define LOG_EVENT_OFFSET 4
 
@@ -256,6 +257,8 @@ struct sql_ex_info
 #define EXECUTE_LOAD_QUERY_HEADER_LEN  (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN)
 #define INCIDENT_HEADER_LEN    2
 #define HEARTBEAT_HEADER_LEN   0
+#define ANNOTATE_ROWS_HEADER_LEN  0
+
 /* 
   Max number of possible extra bytes in a replication event compared to a
   packet (i.e. a query) sent from client to master;
@@ -271,6 +274,7 @@ struct sql_ex_info
                                    1 + 2          /* type, charset_database_number */ + \
                                    1 + 8          /* type, table_map_for_update */ + \
                                    1 + 4          /* type, master_data_written */ + \
+                                   1 + 3          /* type, sec_part of NOW() */ + \
                                    1 + 16 + 1 + 60/* type, user_len, user, host_len, host */)
 #define MAX_LOG_EVENT_HEADER   ( /* in order of Query_log_event::write */ \
   LOG_EVENT_HEADER_LEN + /* write_header */ \
@@ -342,6 +346,8 @@ struct sql_ex_info
 
 #define Q_INVOKER 11
 
+#define Q_HRNOW 128
+
 /* Intvar event post-header */
 
 /* Intvar event data */
@@ -404,7 +410,7 @@ struct sql_ex_info
 #define ELQ_DUP_HANDLING_OFFSET ELQ_FILE_ID_OFFSET + 12
 
 /* 4 bytes which all binlogs should begin with */
-#define BINLOG_MAGIC        "\xfe\x62\x69\x6e"
+#define BINLOG_MAGIC        (const uchar*) "\xfe\x62\x69\x6e"
 
 /*
   The 2 flags below were useless :
@@ -530,6 +536,22 @@ struct sql_ex_info
 #endif
 #undef EXPECTED_OPTIONS         /* You shouldn't use this one */
 
+enum enum_binlog_checksum_alg {
+  BINLOG_CHECKSUM_ALG_OFF= 0,    // Events are without checksum though its generator
+                                 // is checksum-capable New Master (NM).
+  BINLOG_CHECKSUM_ALG_CRC32= 1,  // CRC32 of zlib algorithm.
+  BINLOG_CHECKSUM_ALG_ENUM_END,  // the cut line: valid alg range is [1, 0x7f].
+  BINLOG_CHECKSUM_ALG_UNDEF= 255 // special value to tag undetermined yet checksum
+                                 // or events from checksum-unaware servers
+};
+
+#define CHECKSUM_CRC32_SIGNATURE_LEN 4
+/**
+   defined statically while there is just one alg implemented
+*/
+#define BINLOG_CHECKSUM_LEN CHECKSUM_CRC32_SIGNATURE_LEN
+#define BINLOG_CHECKSUM_ALG_DESC_LEN 1  /* 1 byte checksum alg descriptor */
+
 /**
   @enum Log_event_type
 
@@ -599,6 +621,15 @@ enum Log_event_type
     Existing events (except ENUM_END_EVENT) should never change their numbers
   */
 
+  /* New MySQL/Sun events are to be added right above this comment */
+  MYSQL_EVENTS_END,
+
+  MARIA_EVENTS_BEGIN= 160,
+  /* New Maria event numbers start from here */
+  ANNOTATE_ROWS_EVENT= 160,
+
+  /* Add new MariaDB events here - right above this comment!  */
+
   ENUM_END_EVENT /* end marker */
 };
 
@@ -930,6 +961,13 @@ public:
      event's type, and its content is distributed in the event-specific fields.
   */
   char *temp_buf;
+  
+  /*
+    TRUE <=> this event 'owns' temp_buf and should call my_free() when done
+    with it
+  */
+  bool event_owns_temp_buf;
+
   /*
     Timestamp on the master(for debugging and replication of
     NOW()/TIMESTAMP).  It is important for queries and LOAD DATA
@@ -938,7 +976,8 @@ public:
     execution time, which guarantees good replication (otherwise, we
     could have a query and its event with different timestamps).
   */
-  time_t when;
+  my_time_t when;
+  ulong     when_sec_part;
   /* The number of seconds the query took to run on the master. */
   ulong exec_time;
   /* Number of bytes written by write() function */
@@ -956,11 +995,7 @@ public:
     LOG_EVENT_SUPPRESS_USE_F for notes.
   */
   uint16 flags;
-  
-  /*
-    Defines the type of the cache, if any, where the event will be
-    stored before being flushed to disk.
-  */
+
   uint16 cache_type;
 
   /**
@@ -969,6 +1004,11 @@ public:
   */
   ulong slave_exec_mode;
 
+  /**
+    Placeholder for event checksum while writing to binlog.
+   */
+  ha_checksum crc;
+
 #ifdef MYSQL_SERVER
   THD* thd;
 
@@ -988,9 +1028,10 @@ public:
   static Log_event* read_log_event(IO_CACHE* file,
                                    mysql_mutex_t* log_lock,
                                    const Format_description_log_event
-                                   *description_event);
+                                   *description_event,
+                                   my_bool crc_check);
   static int read_log_event(IO_CACHE* file, String* packet,
-                            mysql_mutex_t* log_lock);
+                            mysql_mutex_t* log_lock, uint8 checksum_alg_arg);
   /*
     init_show_field_list() prepares the column names and types for the
     output of SHOW BINLOG EVENTS; it is used only by SHOW BINLOG
@@ -1017,7 +1058,7 @@ public:
     /* avoid having to link mysqlbinlog against libpthread */
   static Log_event* read_log_event(IO_CACHE* file,
                                    const Format_description_log_event
-                                   *description_event);
+                                   *description_event, my_bool crc_check);
   /* print*() functions are used by mysqlbinlog */
   virtual void print(FILE* file, PRINT_EVENT_INFO* print_event_info) = 0;
   void print_timestamp(IO_CACHE* file, time_t *ts = 0);
@@ -1026,6 +1067,15 @@ public:
   void print_base64(IO_CACHE* file, PRINT_EVENT_INFO* print_event_info,
                     bool is_more);
 #endif
+  /* 
+     The value is set by caller of FD constructor and
+     Log_event::write_header() for the rest.
+     In the FD case it's propagated into the last byte 
+     of post_header_len[] at FD::write().
+     On the slave side the value is assigned from post_header_len[last] 
+     of the last seen FD event.
+  */
+  uint8 checksum_alg;
 
   static void *operator new(size_t size)
   {
@@ -1040,29 +1090,46 @@ public:
   /* Placement version of the above operators */
   static void *operator new(size_t, void* ptr) { return ptr; }
   static void operator delete(void*, void*) { }
+  bool wrapper_my_b_safe_write(IO_CACHE* file, const uchar* buf, ulong data_length);
 
 #ifdef MYSQL_SERVER
   bool write_header(IO_CACHE* file, ulong data_length);
+  bool write_footer(IO_CACHE* file);
+  my_bool need_checksum();
+
   virtual bool write(IO_CACHE* file)
   {
-    return (write_header(file, get_data_size()) ||
-            write_data_header(file) ||
-            write_data_body(file));
+    return(write_header(file, get_data_size()) ||
+	   write_data_header(file) ||
+	   write_data_body(file) ||
+	   write_footer(file));
   }
   virtual bool write_data_header(IO_CACHE* file)
   { return 0; }
   virtual bool write_data_body(IO_CACHE* file __attribute__((unused)))
   { return 0; }
-  inline time_t get_time()
+  inline my_time_t get_time()
   {
     THD *tmp_thd;
     if (when)
       return when;
     if (thd)
-      return thd->start_time;
+    {
+      when= thd->start_time;
+      when_sec_part= thd->start_time_sec_part;
+      return when;
+    }
+    /* thd will only be 0 here at time of log creation */
     if ((tmp_thd= current_thd))
-      return tmp_thd->start_time;
-    return my_time(0);
+    {
+      when= tmp_thd->start_time;
+      when_sec_part= tmp_thd->start_time_sec_part;
+      return when;
+    }
+    my_hrtime_t hrtime= my_hrtime();
+    when= hrtime_to_my_time(hrtime);
+    when_sec_part= hrtime_sec_part(hrtime);
+    return when;
   }
 #endif
   virtual Log_event_type get_type_code() = 0;
@@ -1086,12 +1153,17 @@ public:
   Log_event(const char* buf, const Format_description_log_event
             *description_event);
   virtual ~Log_event() { free_temp_buf();}
-  void register_temp_buf(char* buf) { temp_buf = buf; }
+  void register_temp_buf(char* buf, bool must_free) 
+  { 
+    temp_buf= buf; 
+    event_owns_temp_buf= must_free;
+  }
   void free_temp_buf()
   {
     if (temp_buf)
     {
-      my_free(temp_buf);
+      if (event_owns_temp_buf)
+        my_free(temp_buf);
       temp_buf = 0;
     }
   }
@@ -1103,7 +1175,7 @@ public:
   static Log_event* read_log_event(const char* buf, uint event_len,
 				   const char **error,
                                    const Format_description_log_event
-                                   *description_event);
+                                   *description_event, my_bool crc_check);
   /**
     Returns the human readable name of the given event type.
   */
@@ -1426,7 +1498,7 @@ protected:
     MODE_PIPES_AS_CONCAT==0x2
     MODE_ANSI_QUOTES==0x4
     MODE_IGNORE_SPACE==0x8
-    MODE_NOT_USED==0x10
+    MODE_IGNORE_BAD_TABLE_OPTIONS==0x10
     MODE_ONLY_FULL_GROUP_BY==0x20
     MODE_NO_UNSIGNED_SUBTRACTION==0x40
     MODE_NO_DIR_IN_CREATE==0x80
@@ -2182,8 +2254,6 @@ public:        /* !!! Public in this patch to allow old usage */
 #endif
 };
 
-extern char server_version[SERVER_VERSION_LENGTH];
-
 /**
   @class Start_log_event_v3
 
@@ -2295,9 +2365,17 @@ public:
   */
   uint8 common_header_len;
   uint8 number_of_event_types;
-  /* The list of post-headers' lengthes */
+  /* 
+     The list of post-headers' lengths followed 
+     by the checksum alg decription byte
+  */
   uint8 *post_header_len;
-  uchar server_version_split[3];
+  struct master_version_split {
+    enum {KIND_MYSQL, KIND_MARIADB};
+    int kind;
+    uchar ver[3];
+  };
+  master_version_split server_version_split;
   const uint8 *event_type_permutation;
 
   Format_description_log_event(uint8 binlog_ver, const char* server_ver=0);
@@ -2329,7 +2407,7 @@ public:
   }
 
   void calc_server_version_split();
-
+  static bool is_version_before_checksum(const master_version_split *version_split);
 protected:
 #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION)
   virtual int do_apply_event(Relay_log_info const *rli);
@@ -2506,7 +2584,12 @@ class Xid_log_event: public Log_event
    my_xid xid;
 
 #ifdef MYSQL_SERVER
-  Xid_log_event(THD* thd_arg, my_xid x): Log_event(thd_arg, 0, TRUE), xid(x) {}
+  Xid_log_event(THD* thd_arg, my_xid x, bool direct):
+   Log_event(thd_arg, 0, TRUE), xid(x)
+   {
+     if (direct)
+       cache_type= Log_event::EVENT_NO_CACHE;
+   }
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
 #endif /* HAVE_REPLICATION */
@@ -3060,6 +3143,59 @@ public:
 char *str_to_hex(char *to, const char *from, uint len);
 
 /**
+  @class Annotate_rows_log_event
+
+  In row-based mode, if binlog_annotate_rows_events = ON, each group of
+  Table_map_log_events is preceded by an Annotate_rows_log_event which
+  contains the query which caused the subsequent rows operations.
+
+  The Annotate_rows_log_event has no post-header and its body contains
+  the corresponding query (without trailing zero). Note. The query length
+  is to be calculated as a difference between the whole event length and
+  the common header length.
+*/
+class Annotate_rows_log_event: public Log_event
+{
+public:
+#ifndef MYSQL_CLIENT
+  Annotate_rows_log_event(THD*, uint16 cache_type_arg);
+#endif
+  Annotate_rows_log_event(const char *buf, uint event_len,
+                          const Format_description_log_event*);
+  ~Annotate_rows_log_event();
+
+  virtual int get_data_size();
+  virtual Log_event_type get_type_code();
+  virtual bool is_valid() const;
+
+#ifndef MYSQL_CLIENT
+  virtual bool write_data_header(IO_CACHE*);
+  virtual bool write_data_body(IO_CACHE*);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+  virtual void pack_info(Protocol*);
+#endif
+
+#ifdef MYSQL_CLIENT
+  virtual void print(FILE*, PRINT_EVENT_INFO*);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+private:
+  virtual int do_apply_event(Relay_log_info const*);
+  virtual int do_update_pos(Relay_log_info*);
+  virtual enum_skip_reason do_shall_skip(Relay_log_info*);
+#endif
+
+private:
+  char *m_query_txt;
+  uint  m_query_len;
+  char *m_save_thd_query_txt;
+  uint  m_save_thd_query_len;
+};
+
+/**
   @class Table_map_log_event
 
   In row-based mode, every row operation event is preceded by a
@@ -3433,6 +3569,8 @@ public:
     return new table_def(m_coltype, m_colcnt, m_field_metadata,
                          m_field_metadata_size, m_null_bits, m_flags);
   }
+  int rewrite_db(const char* new_name, size_t new_name_len,
+                 const Format_description_log_event*);
 #endif
   ulong get_table_id() const        { return m_table_id; }
   const char *get_table_name() const { return m_tblnam; }
@@ -3662,7 +3800,10 @@ protected:
   const uchar *m_curr_row;     /* Start of the row being processed */
   const uchar *m_curr_row_end; /* One-after the end of the current row */
   uchar    *m_key;      /* Buffer to keep key value during searches */
+  KEY      *m_key_info; /* Pointer to KEY info for m_key_nr */
+  uint      m_key_nr;   /* Key number */
 
+  int find_key(); // Find a best key to use in find_row()
   int find_row(const Relay_log_info *const);
   int write_row(const Relay_log_info *const, const bool);
 
@@ -4084,6 +4225,14 @@ private:
 int append_query_string(CHARSET_INFO *csinfo,
                         String const *from, String *to);
 
+bool rpl_get_position_info(const char **log_file_name, ulonglong *log_pos,
+                           const char **group_relay_log_name,
+                           ulonglong *relay_log_pos);
+
+bool event_checksum_test(uchar *buf, ulong event_len, uint8 alg);
+uint8 get_checksum_alg(const char* buf, ulong len);
+extern TYPELIB binlog_checksum_typelib;
+
 /**
   @} (end of group Replication)
 */
diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc
index 4aa8c0959b1..a52c584726a 100644
--- a/sql/log_event_old.cc
+++ b/sql/log_event_old.cc
@@ -87,7 +87,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
       we need to do any changes to that value after this function.
     */
     lex_start(ev_thd);
-    mysql_reset_thd_for_next_command(ev_thd);
+    mysql_reset_thd_for_next_command(ev_thd, 0);
 
     /*
       This is a row injection, so we flag the "statement" as
@@ -163,7 +163,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
       const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
     }
 #ifdef HAVE_QUERY_CACHE
-    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
+    query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock);
 #endif
   }
 
@@ -185,7 +185,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
       TIMESTAMP column to a table with one.
       So we call set_time(), like in SBR. Presently it changes nothing.
     */
-    ev_thd->set_time((time_t)ev->when);
+    ev_thd->set_time(ev->when, ev->when_sec_part);
     /*
       There are a few flags that are replicated with each row event.
       Make sure to set/clear them before executing the main body of
@@ -559,7 +559,7 @@ replace_record(THD *thd, TABLE *table,
      */
     if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
     {
-      error= table->file->rnd_pos(table->record[1], table->file->dup_ref);
+      error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref);
       if (error)
       {
         DBUG_PRINT("info",("rnd_pos() returns error %d",error));
@@ -585,10 +585,10 @@ replace_record(THD *thd, TABLE *table,
 
       key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
                0);
-      error= table->file->index_read_idx_map(table->record[1], keynum,
-                                             (const uchar*)key.get(),
-                                             HA_WHOLE_KEY,
-                                             HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[1], keynum,
+                                                (const uchar*)key.get(),
+                                                HA_WHOLE_KEY,
+                                                HA_READ_KEY_EXACT);
       if (error)
       {
         DBUG_PRINT("info", ("index_read_idx() returns error %d", error));
@@ -700,13 +700,13 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
       length. Something along these lines should work:
 
       ADD>>>  store_record(table,record[1]);
-              int error= table->file->rnd_pos(table->record[0], table->file->ref);
+              int error= table->file->ha_rnd_pos(table->record[0], table->file->ref);
       ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
                                  table->s->reclength) == 0);
 
     */
     table->file->position(table->record[0]);
-    int error= table->file->rnd_pos(table->record[0], table->file->ref);
+    int error= table->file->ha_rnd_pos(table->record[0], table->file->ref);
     /*
       rnd_pos() returns the record in table->record[0], so we have to
       move it to table->record[1].
@@ -730,7 +730,7 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength);
     DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength);
 #endif
@@ -744,8 +744,9 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
     my_ptrdiff_t const pos=
       table->s->null_bytes > 0 ? table->s->null_bytes - 1 : 0;
     table->record[1][pos]= 0xFF;
-    if ((error= table->file->index_read_map(table->record[1], key, HA_WHOLE_KEY,
-                                            HA_READ_KEY_EXACT)))
+    if ((error= table->file->ha_index_read_map(table->record[1], key,
+                                               HA_WHOLE_KEY,
+                                               HA_READ_KEY_EXACT)))
     {
       table->file->print_error(error, MYF(0));
       table->file->ha_index_end();
@@ -756,7 +757,7 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength);
     DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength);
 #endif
@@ -799,7 +800,7 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
           256U - (1U << table->s->last_null_bit_pos);
       }
 
-      while ((error= table->file->index_next(table->record[1])))
+      while ((error= table->file->ha_index_next(table->record[1])))
       {
         /* We just skip records that has already been deleted */
         if (error == HA_ERR_RECORD_DELETED)
@@ -821,14 +822,14 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
     int error;
 
     /* We don't have a key: search the table using rnd_next() */
-    if ((error= table->file->ha_rnd_init(1)))
+    if ((error= table->file->ha_rnd_init_with_error(1)))
       return error;
 
     /* Continue until we find the right record or have made a full loop */
     do
     {
   restart_rnd_next:
-      error= table->file->rnd_next(table->record[1]);
+      error= table->file->ha_rnd_next(table->record[1]);
 
       DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
       DBUG_DUMP("record[1]", table->record[1], table->s->reclength);
@@ -845,15 +846,19 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
         goto restart_rnd_next;
 
       case HA_ERR_END_OF_FILE:
-  if (++restart_count < 2)
-    table->file->ha_rnd_init(1);
-  break;
+        if (++restart_count < 2)
+        {
+          int error2;
+          if ((error2= table->file->ha_rnd_init_with_error(1)))
+            DBUG_RETURN(error2);
+        }
+        break;
 
       default:
-  table->file->print_error(error, MYF(0));
+        table->file->print_error(error, MYF(0));
         DBUG_PRINT("info", ("Record not found"));
         table->file->ha_rnd_end();
-  DBUG_RETURN(error);
+        DBUG_RETURN(error);
       }
     }
     while (restart_count < 2 && record_compare(table));
@@ -1410,7 +1415,7 @@ int Old_rows_log_event::do_add_row_data(uchar *row_data, size_t length)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   DBUG_DUMP("row_data", row_data, min(length, 32));
 #endif
 
@@ -1573,7 +1578,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli)
       const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
     }
 #ifdef HAVE_QUERY_CACHE
-    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
+    query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock);
 #endif
   }
 
@@ -1597,7 +1602,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli)
       TIMESTAMP column to a table with one.
       So we call set_time(), like in SBR. Presently it changes nothing.
     */
-    thd->set_time((time_t)when);
+    thd->set_time(when, when_sec_part);
     /*
       There are a few flags that are replicated with each row event.
       Make sure to set/clear them before executing the main body of
@@ -2065,7 +2070,7 @@ Old_rows_log_event::write_row(const Relay_log_info *const rli,
     if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
     {
       DBUG_PRINT("info",("Locating offending record using rnd_pos()"));
-      error= table->file->rnd_pos(table->record[1], table->file->dup_ref);
+      error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref);
       if (error)
       {
         DBUG_PRINT("info",("rnd_pos() returns error %d",error));
@@ -2097,10 +2102,10 @@ Old_rows_log_event::write_row(const Relay_log_info *const rli,
 
       key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
                0);
-      error= table->file->index_read_idx_map(table->record[1], keynum,
-                                             (const uchar*)key.get(),
-                                             HA_WHOLE_KEY,
-                                             HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[1], keynum,
+                                                (const uchar*)key.get(),
+                                                HA_WHOLE_KEY,
+                                                HA_READ_KEY_EXACT);
       if (error)
       {
         DBUG_PRINT("info",("index_read_idx() returns error %d", error));
@@ -2251,13 +2256,13 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
       length. Something along these lines should work:
 
       ADD>>>  store_record(table,record[1]);
-              int error= table->file->rnd_pos(table->record[0], table->file->ref);
+              int error= table->file->ha_rnd_pos(table->record[0], table->file->ref);
       ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
                                  table->s->reclength) == 0);
 
     */
     DBUG_PRINT("info",("locating record using primary key (position)"));
-    int error= table->file->rnd_pos_by_record(table->record[0]);
+    int error= table->file->ha_rnd_pos_by_record(table->record[0]);
     if (error)
     {
       DBUG_PRINT("info",("rnd_pos returns error %d",error));
@@ -2303,7 +2308,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
       Don't print debug messages when running valgrind since they can
       trigger false warnings.
      */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_DUMP("key data", m_key, table->key_info->key_length);
 #endif
 
@@ -2317,9 +2322,9 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
       table->s->null_bytes > 0 ? table->s->null_bytes - 1 : 0;
     table->record[0][pos]= 0xFF;
     
-    if ((error= table->file->index_read_map(table->record[0], m_key, 
-                                            HA_WHOLE_KEY,
-                                            HA_READ_KEY_EXACT)))
+    if ((error= table->file->ha_index_read_map(table->record[0], m_key, 
+                                               HA_WHOLE_KEY,
+                                               HA_READ_KEY_EXACT)))
     {
       DBUG_PRINT("info",("no record matching the key found in the table"));
       if (error == HA_ERR_RECORD_DELETED)
@@ -2333,7 +2338,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_PRINT("info",("found first matching record")); 
     DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
 #endif
@@ -2408,7 +2413,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
           256U - (1U << table->s->last_null_bit_pos);
       }
 
-      while ((error= table->file->index_next(table->record[0])))
+      while ((error= table->file->ha_index_next(table->record[0])))
       {
         /* We just skip records that has already been deleted */
         if (error == HA_ERR_RECORD_DELETED)
@@ -2432,11 +2437,10 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
     int restart_count= 0; // Number of times scanning has restarted from top
 
     /* We don't have a key: search the table using rnd_next() */
-    if ((error= table->file->ha_rnd_init(1)))
+    if ((error= table->file->ha_rnd_init_with_error(1)))
     {
       DBUG_PRINT("info",("error initializing table scan"
                          " (ha_rnd_init returns %d)",error));
-      table->file->print_error(error, MYF(0));
       DBUG_RETURN(error);
     }
 
@@ -2444,7 +2448,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
     do
     {
   restart_rnd_next:
-      error= table->file->rnd_next(table->record[0]);
+      error= table->file->ha_rnd_next(table->record[0]);
 
       switch (error) {
 
@@ -2456,7 +2460,11 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
 
       case HA_ERR_END_OF_FILE:
         if (++restart_count < 2)
-          table->file->ha_rnd_init(1);
+        {
+          int error2;
+          if ((error2= table->file->ha_rnd_init_with_error(1)))
+            DBUG_RETURN(error2);
+        }
         break;
 
       default:
@@ -2855,7 +2863,7 @@ Update_rows_log_event_old::do_exec_row(const Relay_log_info *const rli)
     Now we have the right row to update.  The old row (the one we're
     looking for) is in record[1] and the new row is in record[0].
   */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   /*
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
diff --git a/sql/log_event_old.h b/sql/log_event_old.h
index 719802a80fb..da5cf403fdb 100644
--- a/sql/log_event_old.h
+++ b/sql/log_event_old.h
@@ -1,4 +1,4 @@
-/* Copyright 2007 MySQL AB. All rights reserved.
+/* Copyright 2007 MySQL AB.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
diff --git a/sql/log_slow.h b/sql/log_slow.h
new file mode 100644
index 00000000000..92a2d1bf4f6
--- /dev/null
+++ b/sql/log_slow.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2009 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 or later of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/* Defining what to log to slow log */
+
+#define LOG_SLOW_VERBOSITY_INIT           0
+#define LOG_SLOW_VERBOSITY_INNODB         1 << 0
+#define LOG_SLOW_VERBOSITY_QUERY_PLAN     1 << 1
+
+#define QPLAN_INIT            QPLAN_QC_NO
+
+#define QPLAN_ADMIN           1 << 0
+#define QPLAN_FILESORT        1 << 1
+#define QPLAN_FILESORT_DISK   1 << 2
+#define QPLAN_FULL_JOIN       1 << 3
+#define QPLAN_FULL_SCAN       1 << 4
+#define QPLAN_QC              1 << 5
+#define QPLAN_QC_NO           1 << 6
+#define QPLAN_TMP_DISK        1 << 7
+#define QPLAN_TMP_TABLE       1 << 8
+/* ... */
+#define QPLAN_MAX             ((ulong) 1) << 31 /* reserved as placeholder */
+
diff --git a/sql/mdl.cc b/sql/mdl.cc
index 046ac25703a..d29f6a112d4 100644
--- a/sql/mdl.cc
+++ b/sql/mdl.cc
@@ -2223,9 +2223,6 @@ bool MDL_context::visit_subgraph(MDL_wait_for_graph_visitor *gvisitor)
   @note If during deadlock resolution context which performs deadlock
         detection is chosen as a victim it will be informed about the
         fact by setting VICTIM status to its wait slot.
-
-  @retval TRUE  A deadlock is found.
-  @retval FALSE No deadlock found.
 */
 
 void MDL_context::find_deadlock()
diff --git a/sql/mdl.h b/sql/mdl.h
index fb2de45b831..3a0ac0037aa 100644
--- a/sql/mdl.h
+++ b/sql/mdl.h
@@ -26,6 +26,7 @@
 
 #include "sql_plist.h"
 #include <my_sys.h>
+#include <my_pthread.h>
 #include <m_string.h>
 #include <mysql_com.h>
 
diff --git a/sql/mf_iocache.cc b/sql/mf_iocache.cc
index 17e406ab8a3..d8848c1ee35 100644
--- a/sql/mf_iocache.cc
+++ b/sql/mf_iocache.cc
@@ -86,6 +86,12 @@ int _my_b_net_read(register IO_CACHE *info, uchar *Buffer,
 }
 
 } /* extern "C" */
+
+#elif defined(__WIN__)
+
+// Remove linker warning 4221 about empty file
+namespace { char dummy; };
+
 #endif /* HAVE_REPLICATION */
 
 
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
new file mode 100644
index 00000000000..130ab676e7b
--- /dev/null
+++ b/sql/multi_range_read.cc
@@ -0,0 +1,1818 @@
+/* Copyright (C) 2010, 2011 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include "sql_parse.h"
+#include <my_bit.h>
+#include "sql_select.h"
+#include "key.h"
+
+/****************************************************************************
+ * Default MRR implementation (MRR to non-MRR converter)
+ ***************************************************************************/
+
+/**
+  Get cost and other information about MRR scan over a known list of ranges
+
+  Calculate estimated cost and other information about an MRR scan for given
+  sequence of ranges.
+
+  @param keyno           Index number
+  @param seq             Range sequence to be traversed
+  @param seq_init_param  First parameter for seq->init()
+  @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
+                         can't efficiently determine it
+  @param bufsz    INOUT  IN:  Size of the buffer available for use
+                         OUT: Size of the buffer that is expected to be actually
+                              used, or 0 if buffer is not needed.
+  @param flags    INOUT  A combination of HA_MRR_* flags
+  @param cost     OUT    Estimated cost of MRR access
+
+  @note
+    This method (or an overriding one in a derived class) must check for
+    thd->killed and return HA_POS_ERROR if it is not zero. This is required
+    for a user to be able to interrupt the calculation by killing the
+    connection/query.
+
+  @retval
+    HA_POS_ERROR  Error or the engine is unable to perform the requested
+                  scan. Values of OUT parameters are undefined.
+  @retval
+    other         OK, *cost contains cost of the scan, *bufsz and *flags
+                  contain scan parameters.
+*/
+
+ha_rows 
+handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                     void *seq_init_param, uint n_ranges_arg,
+                                     uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  KEY_MULTI_RANGE range;
+  range_seq_t seq_it;
+  ha_rows rows, total_rows= 0;
+  uint n_ranges=0;
+  THD *thd= current_thd;
+  
+  /* Default MRR implementation doesn't need buffer */
+  *bufsz= 0;
+
+  seq_it= seq->init(seq_init_param, n_ranges, *flags);
+  while (!seq->next(seq_it, &range))
+  {
+    if (unlikely(thd->killed != 0))
+      return HA_POS_ERROR;
+    
+    n_ranges++;
+    key_range *min_endp, *max_endp;
+    if (range.range_flag & GEOM_FLAG)
+    {
+      /* In this case tmp_min_flag contains the handler-read-function */
+      range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
+      min_endp= &range.start_key;
+      max_endp= NULL;
+    }
+    else
+    {
+      min_endp= range.start_key.length? &range.start_key : NULL;
+      max_endp= range.end_key.length? &range.end_key : NULL;
+    }
+    if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
+      rows= 1; /* there can be at most one row */
+    else
+    {
+      if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, 
+                                                        max_endp)))
+      {
+        /* Can't scan one range => can't do MRR scan at all */
+        total_rows= HA_POS_ERROR;
+        break;
+      }
+    }
+    total_rows += rows;
+  }
+  
+  if (total_rows != HA_POS_ERROR)
+  {
+    /* The following calculation is the same as in multi_range_read_info(): */
+    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+    cost->zero();
+    cost->avg_io_cost= 1; /* assume random seeks */
+    if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
+      cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows);
+    else
+      cost->io_count= read_time(keyno, n_ranges, total_rows);
+    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+  }
+  return total_rows;
+}
+
+
+/**
+  Get cost and other information about MRR scan over some sequence of ranges
+
+  Calculate estimated cost and other information about an MRR scan for some
+  sequence of ranges.
+
+  The ranges themselves will be known only at execution phase. When this
+  function is called we only know number of ranges and a (rough) E(#records)
+  within those ranges.
+
+  Currently this function is only called for "n-keypart singlepoint" ranges,
+  i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
+
+  The flags parameter is a combination of those flags: HA_MRR_SORTED,
+  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
+
+  @param keyno           Index number
+  @param n_ranges        Estimated number of ranges (i.e. intervals) in the
+                         range sequence.
+  @param n_rows          Estimated total number of records contained within all
+                         of the ranges
+  @param bufsz    INOUT  IN:  Size of the buffer available for use
+                         OUT: Size of the buffer that will be actually used, or
+                              0 if buffer is not needed.
+  @param flags    INOUT  A combination of HA_MRR_* flags
+  @param cost     OUT    Estimated cost of MRR access
+
+  @retval
+    0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
+          parameters.
+  @retval
+    other Error or can't perform the requested scan
+*/
+
+ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
+                                       uint key_parts, uint *bufsz, 
+                                       uint *flags, COST_VECT *cost)
+{
+  /* 
+    Currently we expect this function to be called only in preparation of scan
+    with HA_MRR_SINGLE_POINT property.
+  */
+  DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
+
+  *bufsz= 0; /* Default implementation doesn't need a buffer */
+  *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+  cost->zero();
+  cost->avg_io_cost= 1; /* assume random seeks */
+
+  /* Produce the same cost as non-MRR code does */
+  if (*flags & HA_MRR_INDEX_ONLY)
+    cost->io_count= keyread_time(keyno, n_ranges, n_rows);
+  else
+    cost->io_count= read_time(keyno, n_ranges, n_rows);
+  return 0;
+}
+
+
+/**
+  Initialize the MRR scan
+
+  Initialize the MRR scan. This function may do heavyweight scan 
+  initialization like row prefetching/sorting/etc (NOTE: but better not do
+  it here as we may not need it, e.g. if we never satisfy WHERE clause on
+  previous tables. For many implementations it would be natural to do such
+  initializations in the first multi_read_range_next() call)
+
+  mode is a combination of the following flags: HA_MRR_SORTED,
+  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION 
+
+  @param seq             Range sequence to be traversed
+  @param seq_init_param  First parameter for seq->init()
+  @param n_ranges        Number of ranges in the sequence
+  @param mode            Flags, see the description section for the details
+  @param buf             INOUT: memory buffer to be used
+
+  @note
+    One must have called index_init() before calling this function. Several
+    multi_range_read_init() calls may be made in course of one query.
+
+    Until WL#2623 is done (see its text, section 3.2), the following will 
+    also hold:
+    The caller will guarantee that if "seq->init == mrr_ranges_array_init"
+    then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
+    This property will only be used by NDB handler until WL#2623 is done.
+     
+    Buffer memory management is done according to the following scenario:
+    The caller allocates the buffer and provides it to the callee by filling
+    the members of HANDLER_BUFFER structure.
+    The callee consumes all or some fraction of the provided buffer space, and
+    sets the HANDLER_BUFFER members accordingly.
+    The callee may use the buffer memory until the next multi_range_read_init()
+    call is made, all records have been read, or until index_end() call is
+    made, whichever comes first.
+
+  @retval 0  OK
+  @retval 1  Error
+*/
+
+int
+handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+                               uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+  DBUG_ENTER("handler::multi_range_read_init");
+  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+  mrr_funcs= *seq_funcs;
+  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
+  mrr_have_range= FALSE;
+  DBUG_RETURN(0);
+}
+
+/**
+  Get next record in MRR scan
+
+  Default MRR implementation: read the next record
+
+  @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
+                          Otherwise, the opaque value associated with the range
+                          that contains the returned record.
+
+  @retval 0      OK
+  @retval other  Error code
+*/
+
+int handler::multi_range_read_next(range_id_t *range_info)
+{
+  int result= HA_ERR_END_OF_FILE;
+  bool range_res;
+  DBUG_ENTER("handler::multi_range_read_next");
+
+  if (!mrr_have_range)
+  {
+    mrr_have_range= TRUE;
+    goto start;
+  }
+
+  do
+  {
+    /* Save a call if there can be only one row in range. */
+    if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
+    {
+      result= read_range_next();
+      /* On success or non-EOF errors jump to the end. */
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+    }
+    else
+    {
+      if (was_semi_consistent_read())
+      {
+        /*
+          The following assignment is redundant, but for extra safety and to
+          remove the compiler warning:
+        */
+        range_res= FALSE;
+        goto scan_it_again;
+      }
+      /*
+        We need to set this for the last range only, but checking this
+        condition is more expensive than just setting the result code.
+      */
+      result= HA_ERR_END_OF_FILE;
+    }
+
+start:
+    /* Try the next range(s) until one matches a record. */
+    while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
+    {
+scan_it_again:
+      result= read_range_first(mrr_cur_range.start_key.keypart_map ?
+                                 &mrr_cur_range.start_key : 0,
+                               mrr_cur_range.end_key.keypart_map ?
+                                 &mrr_cur_range.end_key : 0,
+                               test(mrr_cur_range.range_flag & EQ_RANGE),
+                               mrr_is_output_sorted);
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+    }
+  }
+  while ((result == HA_ERR_END_OF_FILE) && !range_res);
+
+  *range_info= mrr_cur_range.ptr;
+  DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
+  DBUG_RETURN(result);
+}
+
+/****************************************************************************
+ * Mrr_*_reader classes (building blocks for DS-MRR)
+ ***************************************************************************/
+
+int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                                  void *seq_init_param, uint n_ranges,
+                                  uint mode,  Key_parameters *key_par_arg,
+                                  Lifo_buffer *key_buffer_arg,
+                                  Buffer_manager *buf_manager_arg)
+{
+  HANDLER_BUFFER no_buffer = {NULL, NULL, NULL};
+  file= h_arg;
+  return file->handler::multi_range_read_init(seq_funcs, seq_init_param,
+                                              n_ranges, mode, &no_buffer);
+}
+
+
+int Mrr_simple_index_reader::get_next(range_id_t *range_info)
+{
+  int res;
+  while (!(res= file->handler::multi_range_read_next(range_info)))
+  {
+    KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range;
+    if (!file->mrr_funcs.skip_index_tuple ||
+        !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr))
+      break;
+  }
+  if (res && res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
+    file->print_error(res, MYF(0));             // Fatal error
+  return res;
+}
+
+
+/**
+  @brief Get next index record
+
+  @param range_info  OUT identifier of range that the returned record belongs to
+  
+  @note
+    We actually iterate over nested sequences:
+    - an ordered sequence of groups of identical keys
+      - each key group has key value, which has multiple matching records 
+        - thus, each record matches all members of the key group
+
+  @retval 0                   OK, next record was successfully read
+  @retval HA_ERR_END_OF_FILE  End of records
+  @retval Other               Some other error; Error is printed
+*/
+
+int Mrr_ordered_index_reader::get_next(range_id_t *range_info)
+{
+  int res;
+  DBUG_ENTER("Mrr_ordered_index_reader::get_next");
+  
+  for(;;)
+  {
+    if (!scanning_key_val_iter)
+    {
+      while ((res= kv_it.init(this)))
+      {
+        if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
+          DBUG_RETURN(res); /* Some fatal error */
+
+        if (key_buffer->is_empty())
+        {
+          DBUG_RETURN(HA_ERR_END_OF_FILE);
+        }
+      }
+      scanning_key_val_iter= TRUE;
+    }
+
+    if ((res= kv_it.get_next(range_info)))
+    {
+      scanning_key_val_iter= FALSE;
+      if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
+        DBUG_RETURN(res);
+      kv_it.move_to_next_key_value();
+      continue;
+    }
+    if (!skip_index_tuple(*range_info) &&
+        !skip_record(*range_info, NULL))
+    {
+      break;
+    }
+    /* Go get another (record, range_id) combination */
+  } /* while */
+
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Supply index reader with the O(1)space it needs for scan interrupt/restore
+  operation
+*/
+
+bool Mrr_ordered_index_reader::set_interruption_temp_buffer(uint rowid_length,
+                                                            uint key_len, 
+                                                            uint saved_pk_len,
+                                                            uchar **space_start,
+                                                            uchar *space_end)
+{
+  if (space_end - *space_start <= (ptrdiff_t)(rowid_length + key_len + saved_pk_len))
+    return TRUE;
+  support_scan_interruptions= TRUE; 
+  
+  saved_rowid= *space_start;
+  *space_start += rowid_length;
+  
+  if (saved_pk_len)
+  {
+    saved_primary_key= *space_start;
+    *space_start += saved_pk_len;
+  }
+  else
+    saved_primary_key= NULL;
+
+  saved_key_tuple= *space_start;
+  *space_start += key_len;
+
+  have_saved_rowid= FALSE;
+  return FALSE;
+}
+
+void Mrr_ordered_index_reader::set_no_interruption_temp_buffer()
+{
+  support_scan_interruptions= FALSE;
+  saved_key_tuple= saved_rowid= saved_primary_key= NULL; /* safety */
+  have_saved_rowid= FALSE;
+}
+
+void Mrr_ordered_index_reader::interrupt_read()
+{
+  DBUG_ASSERT(support_scan_interruptions);
+  TABLE *table= file->get_table();
+  /* Save the current key value */
+  key_copy(saved_key_tuple, table->record[0],
+           &table->key_info[file->active_index],
+           keypar.key_tuple_length);
+  
+  if (saved_primary_key)
+  {
+    key_copy(saved_primary_key, table->record[0], 
+             &table->key_info[table->s->primary_key],
+             table->key_info[table->s->primary_key].key_length);
+  }
+
+  /* Save the last rowid */
+  memcpy(saved_rowid, file->ref, file->ref_length);
+  have_saved_rowid= TRUE;
+}
+
+void Mrr_ordered_index_reader::position()
+{
+  if (have_saved_rowid)
+    memcpy(file->ref, saved_rowid, file->ref_length);
+  else
+    Mrr_index_reader::position();
+}
+
+void Mrr_ordered_index_reader::resume_read()
+{
+  TABLE *table= file->get_table();
+  key_restore(table->record[0], saved_key_tuple, 
+              &table->key_info[file->active_index],
+              keypar.key_tuple_length);
+  if (saved_primary_key)
+  {
+    key_restore(table->record[0], saved_primary_key, 
+                &table->key_info[table->s->primary_key],
+                table->key_info[table->s->primary_key].key_length);
+  }
+}
+
+
+/**
+  Fill the buffer with (lookup_tuple, range_id) pairs and sort
+*/
+
+int Mrr_ordered_index_reader::refill_buffer(bool initial)
+{
+  KEY_MULTI_RANGE cur_range;
+  DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer");
+
+  DBUG_ASSERT(key_buffer->is_empty());
+
+  if (source_exhausted)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  buf_manager->reset_buffer_sizes(buf_manager->arg);
+  key_buffer->reset();
+  key_buffer->setup_writing(keypar.key_size_in_keybuf,
+                            is_mrr_assoc? sizeof(range_id_t) : 0);
+
+  while (key_buffer->can_write() && 
+         !(source_exhausted= mrr_funcs.next(mrr_iter, &cur_range)))
+  {
+    DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
+
+    /* Put key, or {key, range_id} pair into the buffer */
+    key_buffer->write_ptr1= keypar.use_key_pointers ?
+                              (uchar*)&cur_range.start_key.key : 
+                              (uchar*)cur_range.start_key.key;
+    key_buffer->write_ptr2= (uchar*)&cur_range.ptr;
+    key_buffer->write();
+  }
+  
+  /* Force get_next() to start with kv_it.init() call: */
+  scanning_key_val_iter= FALSE;
+
+  if (source_exhausted && key_buffer->is_empty())
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)? 
+                     (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse : 
+                     (qsort2_cmp)Mrr_ordered_index_reader::compare_keys, 
+                   this);
+  DBUG_RETURN(0);
+}
+
+
+int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+                                   void *seq_init_param, uint n_ranges,
+                                   uint mode, Key_parameters *key_par_arg,
+                                   Lifo_buffer *key_buffer_arg,
+                                   Buffer_manager *buf_manager_arg)
+{
+  file= h_arg;
+  key_buffer= key_buffer_arg;
+  buf_manager= buf_manager_arg;
+  keypar= *key_par_arg;
+
+  KEY *key_info= &file->get_table()->key_info[file->active_index];
+  keypar.index_ranges_unique= test(key_info->flags & HA_NOSAME && 
+                                   key_info->key_parts == 
+                                   my_count_bits(keypar.key_tuple_map));
+
+  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+  is_mrr_assoc=    !test(mode & HA_MRR_NO_ASSOCIATION);
+  mrr_funcs= *seq_funcs;
+  source_exhausted= FALSE;
+  if (support_scan_interruptions)
+    bzero(saved_key_tuple, keypar.key_tuple_length);
+  have_saved_rowid= FALSE;
+  return 0;
+}
+
+
+static int rowid_cmp_reverse(void *file, uchar *a, uchar *b)
+{
+  return - ((handler*)file)->cmp_ref(a, b);
+}
+
+
+int Mrr_ordered_rndpos_reader::init(handler *h_arg, 
+                                    Mrr_index_reader *index_reader_arg,
+                                    uint mode,
+                                    Lifo_buffer *buf)
+{
+  file= h_arg;
+  index_reader= index_reader_arg;
+  rowid_buffer= buf;
+  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+  index_reader_exhausted= FALSE;
+  index_reader_needs_refill= TRUE;
+  return 0;
+}
+
+
+/**
+  DS-MRR: Fill and sort the rowid buffer
+
+  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
+  buffer. When the buffer is full or scan is completed, sort the buffer by 
+  rowid and return.
+
+  When this function returns, either rowid buffer is not empty, or the source
+  of lookup keys (i.e. ranges) is exhaused.
+  
+  @retval 0      OK, the next portion of rowids is in the buffer,
+                 properly ordered
+  @retval other  Error
+*/
+
+int Mrr_ordered_rndpos_reader::refill_buffer(bool initial)
+{
+  int res;
+  DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer");
+
+  if (index_reader_exhausted)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  while (initial || index_reader_needs_refill || 
+         (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE)
+  {
+    if ((res= index_reader->refill_buffer(initial)))
+    {
+      if (res == HA_ERR_END_OF_FILE)
+        index_reader_exhausted= TRUE;
+      break;
+    }
+    initial= FALSE;
+    index_reader_needs_refill= FALSE;
+  }
+  DBUG_RETURN(res);
+}
+
+
+void Mrr_index_reader::position()
+{
+  file->position(file->get_table()->record[0]);
+}
+
+
+/* 
+  @brief Try to refill the rowid buffer without calling
+  index_reader->refill_buffer(). 
+*/
+
+int Mrr_ordered_rndpos_reader::refill_from_index_reader()
+{
+  range_id_t range_info;
+  int res;
+  DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader");
+
+  DBUG_ASSERT(rowid_buffer->is_empty());
+  index_rowid= index_reader->get_rowid_ptr();
+  rowid_buffer->reset();
+  rowid_buffer->setup_writing(file->ref_length,
+                              is_mrr_assoc? sizeof(range_id_t) : 0);
+
+  last_identical_rowid= NULL;
+
+  index_reader->resume_read();
+  while (rowid_buffer->can_write())
+  {
+    res= index_reader->get_next(&range_info);
+
+    if (res)
+    {
+      if (res != HA_ERR_END_OF_FILE)
+        DBUG_RETURN(res);
+      index_reader_needs_refill=TRUE;
+      break;
+    }
+
+    index_reader->position();
+
+    /* Put rowid, or {rowid, range_id} pair into the buffer */
+    rowid_buffer->write_ptr1= index_rowid;
+    rowid_buffer->write_ptr2= (uchar*)&range_info;
+    rowid_buffer->write();
+  }
+   
+  index_reader->interrupt_read();
+  /* Sort the buffer contents by rowid */
+  rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file);
+
+  rowid_buffer->setup_reading(file->ref_length,
+                              is_mrr_assoc ? sizeof(range_id_t) : 0);
+  DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0);
+}
+
+
+/*
+  Get the next {record, range_id} using ordered array of rowid+range_id pairs
+
+  @note
+    Since we have sorted rowids, we try not to make multiple rnd_pos() calls
+    with the same rowid value.
+*/
+
+int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info)
+{
+  int res;
+  
+  /* 
+    First, check if rowid buffer has elements with the same rowid value as
+    the previous.
+  */
+  while (last_identical_rowid)
+  {
+    /*
+      Current record (the one we've returned in previous call) was obtained
+      from a rowid that matched multiple range_ids. Return this record again,
+      with next matching range_id.
+    */
+    (void)rowid_buffer->read();
+
+    if (rowid_buffer->read_ptr1 == last_identical_rowid)
+      last_identical_rowid= NULL; /* reached the last of identical rowids */
+
+    if (!is_mrr_assoc)
+      return 0;
+
+    memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
+    if (!index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
+      return 0;
+  }
+  
+  /* 
+     Ok, last_identical_rowid==NULL, it's time to read next different rowid
+     value and get record for it.
+  */
+  for(;;)
+  {
+    /* Return eof if there are no rowids in the buffer after re-fill attempt */
+    if (rowid_buffer->read())
+      return HA_ERR_END_OF_FILE;
+
+    if (is_mrr_assoc)
+    {
+      memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
+      if (index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
+        continue;
+    }
+
+    res= file->ha_rnd_pos(file->get_table()->record[0], 
+                          rowid_buffer->read_ptr1);
+
+    if (res == HA_ERR_RECORD_DELETED)
+    {
+      /* not likely to get this code with current storage engines, but still */
+      continue;
+    }
+
+    if (res)
+      return res; /* Some fatal error */
+
+    break; /* Got another record */
+  }
+
+  /* 
+    Check if subsequent buffer elements have the same rowid value as this
+    one. If yes, remember this fact so that we don't make any more rnd_pos()
+    calls with this value.
+
+    Note: this implies that SQL layer doesn't touch table->record[0]
+    between calls.
+  */
+  Lifo_buffer_iterator it;
+  it.init(rowid_buffer);
+  while (!it.read())
+  {
+    if (file->cmp_ref(it.read_ptr1, rowid_buffer->read_ptr1))
+      break;
+    last_identical_rowid= it.read_ptr1;
+  }
+  return 0;
+}
+
+
+/****************************************************************************
+ * Top-level DS-MRR implementation functions (the ones called by storage engine)
+ ***************************************************************************/
+
+/**
+  DS-MRR: Initialize and start MRR scan
+
+  Initialize and start the MRR scan. Depending on the mode parameter, this
+  may use default or DS-MRR implementation.
+
+  @param h_arg           Table handler to be used
+  @param key             Index to be used
+  @param seq_funcs       Interval sequence enumeration functions
+  @param seq_init_param  Interval sequence enumeration parameter
+  @param n_ranges        Number of ranges in the sequence.
+  @param mode            HA_MRR_* modes to use
+  @param buf             INOUT Buffer to use
+
+  @retval 0     Ok, Scan started.
+  @retval other Error
+*/
+
+int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                           void *seq_init_param, uint n_ranges, uint mode,
+                           HANDLER_BUFFER *buf)
+{
+  THD *thd= current_thd;
+  int res;
+  Key_parameters keypar;
+  uint key_buff_elem_size;
+  handler *h_idx;
+  Mrr_ordered_rndpos_reader *disk_strategy= NULL;
+  bool do_sort_keys= FALSE;
+  DBUG_ENTER("DsMrr_impl::dsmrr_init");
+  LINT_INIT(key_buff_elem_size); /* set/used when do_sort_keys==TRUE */
+  /*
+    index_merge may invoke a scan on an object for which dsmrr_info[_const]
+    has not been called, so set the owner handler here as well.
+  */
+  primary_file= h_arg;
+  is_mrr_assoc=    !test(mode & HA_MRR_NO_ASSOCIATION);
+
+  strategy_exhausted= FALSE;
+  
+  /* By default, have do-nothing buffer manager */
+  buf_manager.arg= this;
+  buf_manager.reset_buffer_sizes= do_nothing;
+  buf_manager.redistribute_buffer_space= do_nothing;
+
+  if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))
+    goto use_default_impl;
+  
+  /*
+    Determine whether we'll need to do key sorting and/or rnd_pos() scan
+  */
+  index_strategy= NULL;
+  if ((mode & HA_MRR_SINGLE_POINT) &&
+      optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
+  {
+    do_sort_keys= TRUE;
+    index_strategy= &reader_factory.ordered_index_reader;
+  }
+  else
+    index_strategy= &reader_factory.simple_index_reader;
+
+  strategy= index_strategy;
+  /*
+    We don't need a rowid-to-rndpos step if
+     - We're doing a scan on clustered primary key
+     - [In the future] We're doing an index_only read
+  */
+  DBUG_ASSERT(primary_file->inited == handler::INDEX || 
+              (primary_file->inited == handler::RND && 
+               secondary_file && 
+               secondary_file->inited == handler::INDEX));
+
+  h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file;
+  keyno= h_idx->active_index;
+
+  if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered()))
+  {
+    strategy= disk_strategy= &reader_factory.ordered_rndpos_reader;
+  }
+
+  if (is_mrr_assoc)
+    status_var_increment(thd->status_var.ha_multi_range_read_init_count);
+
+  full_buf= buf->buffer;
+  full_buf_end= buf->buffer_end;
+
+  if (do_sort_keys)
+  {
+    /* Pre-calculate some parameters of key sorting */
+    keypar.use_key_pointers= test(mode & HA_MRR_MATERIALIZED_KEYS);
+    seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length, 
+                            &keypar.key_tuple_map);
+    keypar.key_size_in_keybuf= keypar.use_key_pointers? 
+                                 sizeof(char*) : keypar.key_tuple_length;
+    key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*);
+    
+    /* Ordered index reader needs some space to store an index tuple */
+    if (strategy != index_strategy)
+    {
+      uint saved_pk_length=0;
+      if (h_idx->primary_key_is_clustered())
+      {
+        uint pk= h_idx->get_table()->s->primary_key;
+        saved_pk_length= h_idx->get_table()->key_info[pk].key_length;
+      }
+
+      if (reader_factory.ordered_index_reader.
+            set_interruption_temp_buffer(primary_file->ref_length,
+                                         keypar.key_tuple_length,
+                                         saved_pk_length,
+                                         &full_buf, full_buf_end))
+        goto use_default_impl;
+    }
+    else
+      reader_factory.ordered_index_reader.set_no_interruption_temp_buffer();
+  }
+
+  if (strategy == index_strategy)
+  {
+    /* 
+      Index strategy alone handles the record retrieval. Give all buffer space
+      to it. Key buffer should have forward orientation so we can return the
+      end of it.
+    */
+    key_buffer= &forward_key_buf;
+    key_buffer->set_buffer_space(full_buf, full_buf_end);
+    
+    /* Safety: specify that rowid buffer has zero size: */
+    rowid_buffer.set_buffer_space(full_buf_end, full_buf_end);
+
+    if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size))
+      goto use_default_impl;
+
+    if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges,
+                                   mode, &keypar, key_buffer, &buf_manager)))
+      goto error;
+  }
+  else
+  {
+    /* We'll have both index and rndpos strategies working together */
+    if (do_sort_keys)
+    {
+      /* Both strategies will need buffer space, share the buffer */
+      if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map))
+        goto use_default_impl;
+
+      buf_manager.reset_buffer_sizes= reset_buffer_sizes;
+      buf_manager.redistribute_buffer_space= redistribute_buffer_space;
+    }
+    else
+    {
+      /* index strategy doesn't need buffer, give all space to rowids*/
+      rowid_buffer.set_buffer_space(full_buf, full_buf_end);
+      if (!rowid_buffer.have_space_for(primary_file->ref_length + 
+                                       (int)is_mrr_assoc * sizeof(range_id_t)))
+        goto use_default_impl;
+    }
+
+    if ((res= setup_two_handlers()))
+      goto error;
+
+    if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param,
+                                   n_ranges, mode, &keypar, key_buffer, 
+                                   &buf_manager)) || 
+        (res= disk_strategy->init(primary_file, index_strategy, mode, 
+                                  &rowid_buffer)))
+    {
+      goto error;
+    }
+  }
+
+  res= strategy->refill_buffer(TRUE);
+  if (res)
+  {
+    if (res != HA_ERR_END_OF_FILE)
+      goto error;
+    strategy_exhausted= TRUE;
+  }
+
+  /*
+    If we have scanned through all intervals in *seq, then adjust *buf to 
+    indicate that the remaining buffer space will not be used.
+  */
+//  if (dsmrr_eof) 
+//    buf->end_of_used_area= rowid_buffer.end_of_space();
+
+  
+  DBUG_RETURN(0);
+error:
+  close_second_handler();
+   /* Safety, not really needed but: */
+  strategy= NULL;
+  DBUG_RETURN(res);
+
+use_default_impl:
+  if (primary_file->inited != handler::INDEX)
+  {
+    /* We can get here when 
+       - we've previously successfully done a DS-MRR scan (and so have 
+         secondary_file!= NULL, secondary_file->inited= INDEX, 
+         primary_file->inited=RND)
+       - for this invocation, we haven't got enough buffer space, and so we
+         have to use the default MRR implementation.
+
+      note: primary_file->ha_index_end() will call dsmrr_close() which will
+      close/destroy the secondary_file, this is intentional. 
+      (Yes this is slow, but one can't expect performance with join buffer 
+       so small that it can accomodate one rowid and one index tuple)
+    */
+    if ((res= primary_file->ha_rnd_end()) || 
+        (res= primary_file->ha_index_init(keyno, test(mode & HA_MRR_SORTED))))
+    {
+      DBUG_RETURN(res);
+    }
+  }
+  /* Call correct init function and assign to top level object */
+  Mrr_simple_index_reader *s= &reader_factory.simple_index_reader;
+  res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL, 
+               NULL, NULL);
+  strategy= s;
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Whatever the current state is, make it so that we have two handler objects:
+  - primary_file       -  initialized for rnd_pos() scan
+  - secondary_file     -  initialized for scanning the index specified in
+                          this->keyno
+  RETURN 
+    0        OK
+    HA_XXX   Error code
+*/
+
+int DsMrr_impl::setup_two_handlers()
+{
+  int res;
+  THD *thd= primary_file->get_table()->in_use;
+  DBUG_ENTER("DsMrr_impl::setup_two_handlers");
+  if (!secondary_file)
+  {
+    handler *new_h2;
+    Item *pushed_cond= NULL;
+    DBUG_ASSERT(primary_file->inited == handler::INDEX);
+    /* Create a separate handler object to do rnd_pos() calls. */
+    /*
+      ::clone() takes up a lot of stack, especially on 64 bit platforms.
+      The constant 5 is an empiric result.
+    */
+    if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
+      DBUG_RETURN(1);
+
+    /* Create a separate handler object to do rnd_pos() calls. */
+    if (!(new_h2= primary_file->clone(primary_file->get_table()->s->
+                                      normalized_path.str,
+                                      thd->mem_root)) || 
+        new_h2->ha_external_lock(thd, F_RDLCK))
+    {
+      delete new_h2;
+      DBUG_RETURN(1);
+    }
+
+    if (keyno == primary_file->pushed_idx_cond_keyno)
+      pushed_cond= primary_file->pushed_idx_cond;
+    
+    Mrr_reader *save_strategy= strategy;
+    strategy= NULL;
+    /*
+      Caution: this call will invoke this->dsmrr_close(). Do not put the
+      created secondary table handler new_h2 into this->secondary_file or it 
+      will delete it. Also, save the picked strategy
+    */
+    res= primary_file->ha_index_end();
+
+    strategy= save_strategy;
+    secondary_file= new_h2;
+
+    if (res || (res= (primary_file->ha_rnd_init(FALSE))))
+      goto error;
+
+    table->prepare_for_position();
+    secondary_file->extra(HA_EXTRA_KEYREAD);
+    secondary_file->mrr_iter= primary_file->mrr_iter;
+
+    if ((res= secondary_file->ha_index_init(keyno, FALSE)))
+      goto error;
+
+    if (pushed_cond)
+      secondary_file->idx_cond_push(keyno, pushed_cond);
+  }
+  else
+  {
+    DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX);
+    /* 
+      We get here when the access alternates betwen MRR scan(s) and non-MRR
+      scans.
+
+      Calling primary_file->index_end() will invoke dsmrr_close() for this object,
+      which will delete secondary_file. We need to keep it, so put it away and dont
+      let it be deleted:
+    */
+    if (primary_file->inited == handler::INDEX)
+    {
+      handler *save_h2= secondary_file;
+      Mrr_reader *save_strategy= strategy;
+      secondary_file= NULL;
+      strategy= NULL;
+      res= primary_file->ha_index_end();
+      secondary_file= save_h2;
+      strategy= save_strategy;
+      if (res)
+        goto error;
+    }
+    if ((primary_file->inited != handler::RND) && 
+        (res= primary_file->ha_rnd_init(FALSE)))
+      goto error;
+  }
+  DBUG_RETURN(0);
+
+error:
+  DBUG_RETURN(res);
+}
+
+
+void DsMrr_impl::close_second_handler()
+{
+  if (secondary_file)
+  {
+    secondary_file->ha_index_or_rnd_end();
+    secondary_file->ha_external_lock(current_thd, F_UNLCK);
+    secondary_file->ha_close();
+    delete secondary_file;
+    secondary_file= NULL;
+  }
+}
+
+
+void DsMrr_impl::dsmrr_close()
+{
+  DBUG_ENTER("DsMrr_impl::dsmrr_close");
+  close_second_handler();
+  strategy= NULL;
+  DBUG_VOID_RETURN;
+}
+
+
+/* 
+  my_qsort2-compatible static member function to compare key tuples 
+*/
+
+int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg, 
+                                           uchar* key2_arg)
+{
+  Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg;
+  TABLE *table= reader->file->get_table();
+  KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part;
+  uchar *key1, *key2;
+   
+  if (reader->keypar.use_key_pointers)
+  {
+    /* the buffer stores pointers to keys, get to the keys */
+    memcpy(&key1, key1_arg, sizeof(char*));
+    memcpy(&key2, key2_arg, sizeof(char*));
+  }
+  else
+  {
+    key1= key1_arg;
+    key2= key2_arg;
+  }
+
+  return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length);
+}
+
+
+int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1, 
+                                                   uchar* key2)
+{
+  return -compare_keys(arg, key1, key2);
+}
+
+
+/**
+  Set the buffer space to be shared between rowid and key buffer
+
+  @return FALSE  ok 
+  @return TRUE   There is so little buffer space that we won't be able to use
+                 the strategy. 
+                 This happens when we don't have enough space for one rowid 
+                 element and one key element so this is mainly targeted at
+                 testing.
+*/
+
+bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf, 
+                                      key_part_map key_tuple_map)
+{
+  long key_buff_elem_size= key_size_in_keybuf + 
+                           (int)is_mrr_assoc * sizeof(range_id_t);
+  
+  KEY *key_info= &primary_file->get_table()->key_info[keyno];
+  /* 
+    Ok if we got here we need to allocate one part of the buffer 
+    for keys and another part for rowids.
+  */
+  ulonglong rowid_buf_elem_size= primary_file->ref_length + 
+                                 (int)is_mrr_assoc * sizeof(range_id_t);
+  
+  /*
+    Use rec_per_key statistics as a basis to find out how many rowids 
+    we'll get for each key value.
+     TODO: what should be the default value to use when there is no 
+           statistics?
+  */
+  uint parts= my_count_bits(key_tuple_map);
+  ulong rpc;
+  ulonglong rowids_size= rowid_buf_elem_size;
+  if ((rpc= key_info->rec_per_key[parts - 1]))
+    rowids_size= rowid_buf_elem_size * rpc;
+
+  double fraction_for_rowids=
+    (ulonglong2double(rowids_size) / 
+     (ulonglong2double(rowids_size) + key_buff_elem_size));
+
+  ptrdiff_t bytes_for_rowids= 
+    (ptrdiff_t)floor(0.5 + fraction_for_rowids * (full_buf_end - full_buf));
+  
+  ptrdiff_t bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
+
+  if (bytes_for_keys < key_buff_elem_size + 1)
+  {
+    ptrdiff_t add= key_buff_elem_size + 1 - bytes_for_keys;
+    bytes_for_keys= key_buff_elem_size + 1;
+    bytes_for_rowids -= add;
+  }
+
+  if (bytes_for_rowids < (ptrdiff_t)rowid_buf_elem_size + 1)
+  {
+    ptrdiff_t add= (ptrdiff_t)(rowid_buf_elem_size + 1 - bytes_for_rowids);
+    bytes_for_rowids= (ptrdiff_t)rowid_buf_elem_size + 1;
+    bytes_for_keys -= add;
+  }
+
+  rowid_buffer_end= full_buf + bytes_for_rowids;
+  rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
+  key_buffer= &backward_key_buf;
+  key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end); 
+
+  if (!key_buffer->have_space_for(key_buff_elem_size) ||
+      !rowid_buffer.have_space_for((size_t)rowid_buf_elem_size))
+    return TRUE; /* Failed to provide minimum space for one of the buffers */
+
+  return FALSE;
+}
+
+
+void DsMrr_impl::do_nothing(void *dsmrr_arg)
+{
+  /* Do nothing */
+}
+
+
+void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg)
+{
+  DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
+  dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf, 
+                                       dsmrr->rowid_buffer_end);
+  dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end, 
+                                      dsmrr->full_buf_end);
+}
+
+
+/*
+  Take unused space from the key buffer and give it to the rowid buffer
+*/
+
+void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg)
+{
+  DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
+  uchar *unused_start, *unused_end;
+  dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end);
+  dsmrr->rowid_buffer.grow(unused_start, unused_end);
+}
+
+
+/*
+  @brief Initialize the iterator
+  
+  @note
+  Initialize the iterator to produce matches for the key of the first element 
+  in owner_arg->key_buffer
+
+  @retval  0                    OK
+  @retval  HA_ERR_END_OF_FILE   Either the owner->key_buffer is empty or 
+                                no matches for the key we've tried (check
+                                key_buffer->is_empty() to tell these apart)
+  @retval  other code           Fatal error
+*/
+
+int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg)
+{
+  int res;
+  owner= owner_arg;
+
+  identical_key_it.init(owner->key_buffer);
+  owner->key_buffer->setup_reading(owner->keypar.key_size_in_keybuf,
+                                   owner->is_mrr_assoc ? sizeof(void*) : 0);
+
+  if (identical_key_it.read())
+    return HA_ERR_END_OF_FILE;
+
+  uchar *key_in_buf= last_identical_key_ptr= identical_key_it.read_ptr1;
+
+  uchar *index_tuple= key_in_buf;
+  if (owner->keypar.use_key_pointers)
+    memcpy(&index_tuple, key_in_buf, sizeof(char*));
+  
+  /* Check out how many more identical keys are following */
+  while (!identical_key_it.read())
+  {
+    if (Mrr_ordered_index_reader::compare_keys(owner, key_in_buf, 
+                                               identical_key_it.read_ptr1))
+      break;
+    last_identical_key_ptr= identical_key_it.read_ptr1;
+  }
+  identical_key_it.init(owner->key_buffer);
+  res= owner->file->ha_index_read_map(owner->file->get_table()->record[0], 
+                                      index_tuple, 
+                                      owner->keypar.key_tuple_map, 
+                                      HA_READ_KEY_EXACT);
+
+  if (res)
+  {
+    /* Failed to find any matching records */
+    move_to_next_key_value();
+    return res;
+  }
+  owner->have_saved_rowid= FALSE;
+  get_next_row= FALSE;
+  return 0;
+}
+
+
+int Key_value_records_iterator::get_next(range_id_t *range_info)
+{
+  int res;
+
+  if (get_next_row)
+  {
+    if (owner->keypar.index_ranges_unique)
+    {
+      /* We're using a full unique key, no point to call index_next_same */
+      return HA_ERR_END_OF_FILE;
+    }
+    
+    handler *h= owner->file;
+    if ((res= h->ha_index_next_same(h->get_table()->record[0], 
+                                    identical_key_it.read_ptr1, 
+                                    owner->keypar.key_tuple_length)))
+    {
+      /* It's either HA_ERR_END_OF_FILE or some other error */
+      return res; 
+    }
+    identical_key_it.init(owner->key_buffer);
+    owner->have_saved_rowid= FALSE;
+    get_next_row= FALSE;
+  }
+
+  identical_key_it.read(); /* This gets us next range_id */
+  memcpy(range_info, identical_key_it.read_ptr2, sizeof(range_id_t));
+
+  if (!last_identical_key_ptr || 
+      (identical_key_it.read_ptr1 == last_identical_key_ptr))
+  {
+    /* 
+      We've reached the last of the identical keys that current record is a
+      match for.  Set get_next_row=TRUE so that we read the next index record
+      on the next call to this function.
+    */
+    get_next_row= TRUE;
+  }
+  return 0;
+}
+
+
+void Key_value_records_iterator::move_to_next_key_value()
+{
+  while (!owner->key_buffer->read() && 
+         (owner->key_buffer->read_ptr1 != last_identical_key_ptr)) {}
+}
+
+
+/**
+  DS-MRR implementation: multi_range_read_next() function.
+
+  Calling convention is like multi_range_read_next() has.
+*/
+
+int DsMrr_impl::dsmrr_next(range_id_t *range_info)
+{
+  int res;
+  if (strategy_exhausted)
+    return HA_ERR_END_OF_FILE;
+
+  while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE)
+  {
+    if ((res= strategy->refill_buffer(FALSE)))
+      break; /* EOF or error */
+  }
+  return res;
+}
+
+
+/**
+  DS-MRR implementation: multi_range_read_info() function
+*/
+ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, 
+                               uint key_parts,
+                               uint *bufsz, uint *flags, COST_VECT *cost)
+{  
+  ha_rows res;
+  uint def_flags= *flags;
+  uint def_bufsz= *bufsz;
+
+  /* Get cost/flags/mem_usage of default MRR implementation */
+  res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows,
+                                                    key_parts, &def_bufsz, 
+                                                    &def_flags, cost);
+  DBUG_ASSERT(!res);
+
+  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
+      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
+  {
+    /* Default implementation is choosen */
+    DBUG_PRINT("info", ("Default MRR implementation choosen"));
+    *flags= def_flags;
+    *bufsz= def_bufsz;
+  }
+  else
+  {
+    /* *flags and *bufsz were set by choose_mrr_impl */
+    DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+  }
+  return 0;
+}
+
+
+/**
+  DS-MRR Implementation: multi_range_read_info_const() function
+*/
+
+ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                 void *seq_init_param, uint n_ranges, 
+                                 uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  ha_rows rows;
+  uint def_flags= *flags;
+  uint def_bufsz= *bufsz;
+  /* Get cost/flags/mem_usage of default MRR implementation */
+  rows= primary_file->handler::multi_range_read_info_const(keyno, seq, 
+                                                           seq_init_param,
+                                                           n_ranges, 
+                                                           &def_bufsz, 
+                                                           &def_flags, cost);
+  if (rows == HA_POS_ERROR)
+  {
+    /* Default implementation can't perform MRR scan => we can't either */
+    return rows;
+  }
+
+  /*
+    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
+    use the default MRR implementation (we need it for UPDATE/DELETE).
+    Otherwise, make a choice based on cost and @@optimizer_switch settings
+  */
+  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
+      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
+  {
+    DBUG_PRINT("info", ("Default MRR implementation choosen"));
+    *flags= def_flags;
+    *bufsz= def_bufsz;
+  }
+  else
+  {
+    /* *flags and *bufsz were set by choose_mrr_impl */
+    DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+  }
+  return rows;
+}
+
+
+/**
+  Check if key has partially-covered columns
+
+  We can't use DS-MRR to perform range scans when the ranges are over
+  partially-covered keys, because we'll not have full key part values
+  (we'll have their prefixes from the index) and will not be able to check
+  if we've reached the end the range.
+
+  @param keyno  Key to check
+
+  @todo
+    Allow use of DS-MRR in cases where the index has partially-covered
+    components but they are not used for scanning.
+
+  @retval TRUE   Yes
+  @retval FALSE  No
+*/
+
+bool key_uses_partial_cols(TABLE *table, uint keyno)
+{
+  KEY_PART_INFO *kp= table->key_info[keyno].key_part;
+  KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
+  for (; kp != kp_end; kp++)
+  {
+    if (!kp->field->part_of_key.is_set(keyno))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Check if key/flags allow DS-MRR/CPK strategy to be used
+  
+  @param thd
+  @param keyno      Index that will be used
+  @param  mrr_flags  
+  
+  @retval TRUE   DS-MRR/CPK should be used
+  @retval FALSE  Otherwise
+*/
+
+bool DsMrr_impl::check_cpk_scan(THD *thd, uint keyno, uint mrr_flags)
+{
+  return test((mrr_flags & HA_MRR_SINGLE_POINT) &&
+              keyno == table->s->primary_key && 
+              primary_file->primary_key_is_clustered() && 
+              optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS));
+}
+
+
+/*
+  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
+
+  Make the choice between using Default MRR implementation and DS-MRR.
+  This function contains common functionality factored out of dsmrr_info()
+  and dsmrr_info_const(). The function assumes that the default MRR
+  implementation's applicability requirements are satisfied.
+
+  @param keyno       Index number
+  @param rows        E(full rows to be retrieved)
+  @param flags  IN   MRR flags provided by the MRR user
+                OUT  If DS-MRR is choosen, flags of DS-MRR implementation
+                     else the value is not modified
+  @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
+                     else the value is not modified
+  @param cost   IN   Cost of default MRR implementation
+                OUT  If DS-MRR is choosen, cost of DS-MRR scan
+                     else the value is not modified
+
+  @retval TRUE   Default MRR implementation should be used
+  @retval FALSE  DS-MRR implementation should be used
+*/
+
+
+bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
+                                 uint *bufsz, COST_VECT *cost)
+{
+  COST_VECT dsmrr_cost;
+  bool res;
+  THD *thd= current_thd;
+
+  bool doing_cpk_scan= check_cpk_scan(thd, keyno, *flags); 
+  bool using_cpk= test(keyno == table->s->primary_key &&
+                       primary_file->primary_key_is_clustered());
+  *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_MRR) ||
+      *flags & HA_MRR_INDEX_ONLY ||
+      (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(table, keyno))
+  {
+    /* Use the default implementation */
+    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+    *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
+    return TRUE;
+  }
+
+  uint add_len= table->key_info[keyno].key_length + primary_file->ref_length; 
+  *bufsz -= add_len;
+  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
+    return TRUE;
+  *bufsz += add_len;
+  
+  bool force_dsmrr;
+  /* 
+    If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of
+    DS-MRR and Default implementations cost. This allows one to force use of
+    DS-MRR whenever it is applicable without affecting other cost-based
+    choices.
+  */
+  if ((force_dsmrr= !optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_COST_BASED)) &&
+      dsmrr_cost.total_cost() > cost->total_cost())
+    dsmrr_cost= *cost;
+
+  if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
+  {
+    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
+    *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
+    *cost= dsmrr_cost;
+    res= FALSE;
+
+
+    if ((using_cpk && doing_cpk_scan) ||
+        (optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS) &&
+         *flags & HA_MRR_SINGLE_POINT))
+    {
+      *flags |= DSMRR_IMPL_SORT_KEYS;
+    }
+    
+    if (!(using_cpk && doing_cpk_scan) &&
+        !(*flags & HA_MRR_INDEX_ONLY))
+    {
+      *flags |= DSMRR_IMPL_SORT_ROWIDS;
+    }
+    /*
+    if ((*flags & HA_MRR_SINGLE_POINT) && 
+         optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
+      *flags |= HA_MRR_MATERIALIZED_KEYS;
+    */
+  }
+  else
+  {
+    /* Use the default MRR implementation */
+    res= TRUE;
+  }
+  return res;
+}
+
+/*
+  Take the flags we've returned previously and print one of
+  - Key-ordered scan
+  - Rowid-ordered scan
+  - Key-ordered Rowid-ordered scan
+*/
+
+int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
+{
+  const char *key_ordered=   "Key-ordered scan";
+  const char *rowid_ordered= "Rowid-ordered scan";
+  const char *both_ordered=  "Key-ordered Rowid-ordered scan";
+  const char *used_str="";
+  const uint BOTH_FLAGS= (DSMRR_IMPL_SORT_KEYS | DSMRR_IMPL_SORT_ROWIDS);
+
+  if (!(mrr_mode & HA_MRR_USE_DEFAULT_IMPL))
+  {
+    if ((mrr_mode & BOTH_FLAGS) == BOTH_FLAGS)
+      used_str= both_ordered;
+    else if (mrr_mode & DSMRR_IMPL_SORT_KEYS)
+      used_str= key_ordered;
+    else if (mrr_mode & DSMRR_IMPL_SORT_ROWIDS)
+      used_str= rowid_ordered;
+
+    uint used_str_len= strlen(used_str);
+    uint copy_len= min(used_str_len, size);
+    memcpy(str, used_str, size);
+    return copy_len;
+  }
+  return 0;
+}
+
+
+static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
+
+
+/**
+  Get cost of DS-MRR scan
+
+  @param keynr              Index to be used
+  @param rows               E(Number of rows to be scanned)
+  @param flags              Scan parameters (HA_MRR_* flags)
+  @param buffer_size INOUT  Buffer size
+  @param cost        OUT    The cost
+
+  @retval FALSE  OK
+  @retval TRUE   Error, DS-MRR cannot be used (the buffer is too small
+                 for even 1 rowid)
+*/
+
+bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
+                                         uint *buffer_size, COST_VECT *cost)
+{
+  ulong max_buff_entries, elem_size;
+  ha_rows rows_in_full_step, rows_in_last_step;
+  uint n_full_steps;
+  double index_read_cost;
+
+  elem_size= primary_file->ref_length + 
+             sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
+  max_buff_entries = *buffer_size / elem_size;
+
+  if (!max_buff_entries)
+    return TRUE; /* Buffer has not enough space for even 1 rowid */
+
+  /* Number of iterations we'll make with full buffer */
+  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
+  
+  /* 
+    Get numbers of rows we'll be processing in 
+     - non-last sweep, with full buffer 
+     - last iteration, with non-full buffer
+  */
+  rows_in_full_step= max_buff_entries;
+  rows_in_last_step= rows % max_buff_entries;
+  
+  /* Adjust buffer size if we expect to use only part of the buffer */
+  if (n_full_steps)
+  {
+    get_sort_and_sweep_cost(table, rows, cost);
+    cost->multiply(n_full_steps);
+  }
+  else
+  {
+    cost->zero();
+    *buffer_size= max(*buffer_size, 
+                      (size_t)(1.2*rows_in_last_step) * elem_size + 
+                      primary_file->ref_length + table->key_info[keynr].key_length);
+  }
+  
+  COST_VECT last_step_cost;
+  get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
+  cost->add(&last_step_cost);
+ 
+  if (n_full_steps != 0)
+    cost->mem_cost= *buffer_size;
+  else
+    cost->mem_cost= (double)rows_in_last_step * elem_size;
+  
+  /* Total cost of all index accesses */
+  index_read_cost= primary_file->keyread_time(keynr, 1, rows);
+  cost->add_io(index_read_cost, 1 /* Random seeks */);
+  return FALSE;
+}
+
+
+/* 
+  Get cost of one sort-and-sweep step
+  
+  It consists of two parts:
+   - sort an array of #nrows ROWIDs using qsort
+   - read #nrows records from table in a sweep.
+
+  @param table       Table being accessed
+  @param nrows       Number of rows to be sorted and retrieved
+  @param cost   OUT  The cost of scan
+*/
+
+static 
+void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
+{
+  if (nrows)
+  {
+    get_sweep_read_cost(table, nrows, FALSE, cost);
+    /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
+    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
+    if (cmp_op < 3)
+      cmp_op= 3;
+    cost->cpu_cost += cmp_op * log2(cmp_op);
+  }
+  else
+    cost->zero();
+}
+
+
+/**
+  Get cost of reading nrows table records in a "disk sweep"
+
+  A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
+  for an ordered sequence of rowids.
+
+  We assume hard disk IO. The read is performed as follows:
+
+   1. The disk head is moved to the needed cylinder
+   2. The controller waits for the plate to rotate
+   3. The data is transferred
+
+  Time to do #3 is insignificant compared to #2+#1.
+
+  Time to move the disk head is proportional to head travel distance.
+
+  Time to wait for the plate to rotate depends on whether the disk head
+  was moved or not. 
+
+  If disk head wasn't moved, the wait time is proportional to distance
+  between the previous block and the block we're reading.
+
+  If the head was moved, we don't know how much we'll need to wait for the
+  plate to rotate. We assume the wait time to be a variate with a mean of
+  0.5 of full rotation time.
+
+  Our cost units are "random disk seeks". The cost of random disk seek is
+  actually not a constant, it depends one range of cylinders we're going
+  to access. We make it constant by introducing a fuzzy concept of "typical 
+  datafile length" (it's fuzzy as it's hard to tell whether it should
+  include index file, temp.tables etc). Then random seek cost is:
+
+    1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
+
+  We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
+
+  @param table             Table to be accessed
+  @param nrows             Number of rows to retrieve
+  @param interrupted       TRUE <=> Assume that the disk sweep will be
+                           interrupted by other disk IO. FALSE - otherwise.
+  @param cost         OUT  The cost.
+*/
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
+                         COST_VECT *cost)
+{
+  DBUG_ENTER("get_sweep_read_cost");
+
+  cost->zero();
+  if (table->file->primary_key_is_clustered())
+  {
+    cost->io_count= table->file->read_time(table->s->primary_key,
+                                           (uint) nrows, nrows);
+  }
+  else
+  {
+    double n_blocks=
+      ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
+    double busy_blocks=
+      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
+    if (busy_blocks < 1.0)
+      busy_blocks= 1.0;
+
+    DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
+                       busy_blocks));
+    cost->io_count= busy_blocks;
+
+    if (!interrupted)
+    {
+      /* Assume reading is done in one 'sweep' */
+      cost->avg_io_cost= (DISK_SEEK_BASE_COST +
+                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
+    }
+  }
+  DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
+  DBUG_VOID_RETURN;
+}
+
+
+/* **************************************************************************
+ * DS-MRR implementation ends
+ ***************************************************************************/
+
+
diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h
new file mode 100644
index 00000000000..1b72e71944d
--- /dev/null
+++ b/sql/multi_range_read.h
@@ -0,0 +1,637 @@
+/**
+  @defgroup DS-MRR declarations
+  @{
+*/
+
+/**
+  A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
+
+  This is a "plugin"(*) for storage engines that allows to
+    1. When doing index scans, read table rows in rowid order;
+    2. when making many index lookups, do them in key order and don't
+       lookup the same key value multiple times;
+    3. Do both #1 and #2, when applicable.
+  These changes are expected to speed up query execution for disk-based 
+  storage engines running io-bound loads and "big" queries (ie. queries that
+  do joins and enumerate lots of records).
+
+  (*) - only conceptually. No dynamic loading or binary compatibility of any
+        kind.
+
+  General scheme of things:
+   
+      SQL Layer code
+       |   |   |
+       v   v   v 
+      -|---|---|---- handler->multi_range_read_XXX() function calls
+       |   |   |
+      _____________________________________
+     / DS-MRR module                       \
+     | (order/de-duplicate lookup keys,    |
+     | scan indexes in key order,          |
+     | order/de-duplicate rowids,          |
+     | retrieve full record reads in rowid |
+     | order)                              |
+     \_____________________________________/
+       |   |   |
+      -|---|---|----- handler->read_range_first()/read_range_next(), 
+       |   |   |      handler->index_read(), handler->rnd_pos() calls.
+       |   |   |
+       v   v   v
+      Storage engine internals
+
+
+  Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
+  Potentially it can be used with any table handler that has disk-based data
+  storage and has better performance when reading data in rowid order.
+*/
+
+#include "sql_lifo_buffer.h"
+
+class DsMrr_impl;
+class Mrr_ordered_index_reader;
+
+
+/* A structure with key parameters that's shared among several classes */
+class Key_parameters
+{
+public:
+  uint         key_tuple_length; /* Length of index lookup tuple, in bytes */
+  key_part_map key_tuple_map;    /* keyparts used in index lookup tuples */
+
+  /*
+    This is 
+      = key_tuple_length   if we copy keys to buffer
+      = sizeof(void*)      if we're using pointers to materialized keys.
+  */
+  uint key_size_in_keybuf;
+
+  /* TRUE <=> don't copy key values, use pointers to them instead.  */
+  bool use_key_pointers;
+
+  /* TRUE <=> We can get at most one index tuple for a lookup key */
+  bool index_ranges_unique;
+};
+
+
+/**
+  A class to enumerate (record, range_id) pairs that match given key value.
+  
+  @note
+
+  The idea is that we have a Lifo_buffer which holds (key, range_id) pairs
+  ordered by key value. From the front of the buffer we see
+
+    (key_val1, range_id1), (key_val1, range_id2) ... (key_val2, range_idN)
+
+  we take the first elements that have the same key value (key_val1 in the
+  example above), and make lookup into the table.  The table will have 
+  multiple matches for key_val1:
+ 
+                  == Table Index ==
+                   ...
+     key_val1 ->  key_val1, index_tuple1
+                  key_val1, index_tuple2
+                   ...
+                  key_val1, index_tupleN
+                   ...
+  
+  Our goal is to produce all possible combinations, i.e. we need:
+  
+    {(key_val1, index_tuple1), range_id1}
+    {(key_val1, index_tuple1), range_id2}
+       ...           ...               |
+    {(key_val1, index_tuple1), range_idN},
+                  
+    {(key_val1, index_tuple2), range_id1}
+    {(key_val1, index_tuple2), range_id2}
+        ...          ...               |
+    {(key_val1, index_tuple2), range_idN},
+
+        ...          ...          ...                          
+
+    {(key_val1, index_tupleK), range_idN}
+*/
+
+class Key_value_records_iterator
+{
+  /* Use this to get table handler, key buffer and other parameters */
+  Mrr_ordered_index_reader *owner;
+
+  /* Iterator to get (key, range_id) pairs from */
+  Lifo_buffer_iterator identical_key_it;
+  
+  /* 
+    Last of the identical key values (when we get this pointer from
+    identical_key_it, it will be time to stop).
+  */
+  uchar *last_identical_key_ptr;
+
+  /*
+    FALSE <=> we're right after the init() call, the record has been already
+    read with owner->file->index_read_map() call
+  */
+  bool get_next_row;
+  
+public:
+  int init(Mrr_ordered_index_reader *owner_arg);
+  int get_next(range_id_t *range_info);
+  void move_to_next_key_value();
+};
+
+
+/*
+  Buffer manager interface. Mrr_reader objects use it to inqure DsMrr_impl
+  to manage buffer space for them.
+*/
+typedef struct st_buffer_manager
+{
+public:
+  /* Opaque value to be passed as the first argument to all member functions */
+  void *arg;
+  
+  /*
+    This is called when we've freed more space from the rowid buffer. The
+    callee will get the unused space from the rowid buffer and give it to the
+    key buffer.
+  */
+  void (*redistribute_buffer_space)(void *arg);
+
+  /* 
+    This is called when both key and rowid buffers are empty, and so it's time 
+    to reset them to their original size (They've lost their original size,
+    because we were dynamically growing rowid buffer and shrinking key buffer).
+  */
+  void (*reset_buffer_sizes)(void *arg);
+
+} Buffer_manager;
+
+
+/* 
+  Mrr_reader - DS-MRR execution strategy abstraction
+
+  A reader produces ([index]_record, range_info) pairs, and requires periodic
+  refill operations.
+
+  - one starts using the reader by calling reader->get_next(),
+  - when a get_next() call returns HA_ERR_END_OF_FILE, one must call 
+    refill_buffer() before they can make more get_next() calls.
+  - when refill_buffer() returns HA_ERR_END_OF_FILE, this means the real
+    end of stream and get_next() should not be called anymore.
+
+  Both functions can return other error codes, these mean unrecoverable errors
+  after which one cannot continue.
+*/
+
+class Mrr_reader 
+{
+public:
+  virtual int get_next(range_id_t *range_info) = 0;
+  virtual int refill_buffer(bool initial) = 0;
+  virtual ~Mrr_reader() {}; /* just to remove compiler warning */
+};
+
+
+/* 
+  A common base for readers that do index scans and produce index tuples 
+*/
+
+class Mrr_index_reader : public Mrr_reader
+{
+protected:
+  handler *file; /* Handler object to use */
+public:
+  virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                   void *seq_init_param, uint n_ranges,
+                   uint mode, Key_parameters *key_par, 
+                   Lifo_buffer *key_buffer, 
+                   Buffer_manager *buf_manager_arg) = 0;
+
+  /* Get pointer to place where every get_next() call will put rowid */
+  virtual uchar *get_rowid_ptr() = 0;
+  /* Get the rowid (call this after get_next() call) */
+  virtual void position();
+  virtual bool skip_record(range_id_t range_id, uchar *rowid) = 0;
+
+  virtual void interrupt_read() {}
+  virtual void resume_read() {}
+};
+
+
+/*
+  A "bypass" index reader that just does and index scan. The index scan is done 
+  by calling default MRR implementation (i.e.  handler::multi_range_read_XXX())
+  functions.
+*/
+
+class Mrr_simple_index_reader : public Mrr_index_reader
+{
+public:
+  int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+           void *seq_init_param, uint n_ranges,
+           uint mode, Key_parameters *key_par,
+           Lifo_buffer *key_buffer,
+           Buffer_manager *buf_manager_arg);
+  int get_next(range_id_t *range_info);
+  int refill_buffer(bool initial) { return initial? 0: HA_ERR_END_OF_FILE; }
+  uchar *get_rowid_ptr() { return file->ref; }
+  bool skip_record(range_id_t range_id, uchar *rowid)
+  {
+    return (file->mrr_funcs.skip_record &&
+            file->mrr_funcs.skip_record(file->mrr_iter, range_id, rowid));
+  }
+};
+
+
+/* 
+  A reader that sorts the key values before it makes the index lookups.
+*/
+
+class Mrr_ordered_index_reader : public Mrr_index_reader
+{
+public:
+  int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+           void *seq_init_param, uint n_ranges,
+           uint mode, Key_parameters *key_par,
+           Lifo_buffer *key_buffer,
+           Buffer_manager *buf_manager_arg);
+  int get_next(range_id_t *range_info);
+  int refill_buffer(bool initial);
+  uchar *get_rowid_ptr() { return file->ref; }
+  
+  bool skip_record(range_id_t range_info, uchar *rowid)
+  {
+    return (mrr_funcs.skip_record &&
+            mrr_funcs.skip_record(mrr_iter, range_info, rowid));
+  }
+
+  bool skip_index_tuple(range_id_t range_info)
+  {
+    return (mrr_funcs.skip_index_tuple &&
+            mrr_funcs.skip_index_tuple(mrr_iter, range_info));
+  }
+  
+  bool set_interruption_temp_buffer(uint rowid_length, uint key_len, 
+                                    uint saved_pk_len,
+                                    uchar **space_start, uchar *space_end);
+  void set_no_interruption_temp_buffer();
+
+  void interrupt_read();
+  void resume_read();
+  void position();
+private:
+  Key_value_records_iterator kv_it;
+
+  bool scanning_key_val_iter;
+  
+  /* Buffer to store (key, range_id) pairs */
+  Lifo_buffer *key_buffer;
+  
+  /* This manages key buffer allocation and sizing for us */
+  Buffer_manager *buf_manager;
+
+  Key_parameters  keypar; /* index scan and lookup tuple parameters */
+
+  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+  
+  /* Range sequence iteration members */
+  RANGE_SEQ_IF mrr_funcs;
+  range_seq_t mrr_iter;
+  
+  /* TRUE == reached eof when enumerating ranges */
+  bool source_exhausted;
+   
+  /* 
+    Following members are for interrupt_read()/resume_read(). The idea is that 
+    in some cases index scan that is done by this object is interrupted by
+    rnd_pos() calls made by Mrr_ordered_rndpos_reader. The problem is that
+    we're sharing handler->record[0] with that object, and it destroys its
+    contents.
+    We need to save/restore our current
+    - index tuple (for pushed index condition checks)
+    - clustered primary key values (again, for pushed index condition checks)
+    - rowid of the last record we've retrieved (in case this rowid matches
+      multiple ranges and we'll need to return it again)
+  */ 
+  bool support_scan_interruptions;
+  /* Space where we save the rowid of the last record we've returned */
+  uchar *saved_rowid;
+  
+  /* TRUE <=> saved_rowid has the last saved rowid */
+  bool have_saved_rowid;
+  
+  uchar *saved_key_tuple; /* Saved current key tuple */
+  uchar *saved_primary_key; /* Saved current primary key tuple */
+
+  static int compare_keys(void* arg, uchar* key1, uchar* key2);
+  static int compare_keys_reverse(void* arg, uchar* key1, uchar* key2);
+  
+  friend class Key_value_records_iterator; 
+  friend class DsMrr_impl;
+  friend class Mrr_ordered_rndpos_reader;
+};
+
+
+/* 
+  A reader that gets rowids from an Mrr_index_reader, and then sorts them 
+  before getting full records with handler->rndpos() calls.
+*/
+
+class Mrr_ordered_rndpos_reader : public Mrr_reader 
+{
+public:
+  int init(handler *file, Mrr_index_reader *index_reader, uint mode,
+           Lifo_buffer *buf);
+  int get_next(range_id_t *range_info);
+  int refill_buffer(bool initial);
+private:
+  handler *file; /* Handler to use */
+  
+  /* This what we get (rowid, range_info) pairs from */
+  Mrr_index_reader *index_reader;
+
+  /* index_reader->get_next() puts rowid here */
+  uchar *index_rowid;
+  
+  /* TRUE <=> index_reader->refill_buffer() call has returned EOF */
+  bool index_reader_exhausted;
+  
+  /* 
+    TRUE <=> We should call index_reader->refill_buffer(). This happens if
+    1. we've made index_reader->get_next() call which returned EOF
+    2. we haven't made any index_reader calls (and our first call should 
+       be index_reader->refill_buffer(initial=TRUE)
+  */
+  bool index_reader_needs_refill;
+
+  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+  
+  /* 
+    When reading from ordered rowid buffer: the rowid element of the last
+    buffer element that has rowid identical to this one.
+  */
+  uchar *last_identical_rowid;
+
+  /* Buffer to store (rowid, range_id) pairs */
+  Lifo_buffer *rowid_buffer;
+  
+  int refill_from_index_reader();
+};
+
+
+/*
+  A primitive "factory" of various Mrr_*_reader classes (the point is to 
+  get various kinds of readers without having to allocate them on the heap)
+*/
+
+class Mrr_reader_factory
+{
+public:
+  Mrr_ordered_rndpos_reader ordered_rndpos_reader;
+  Mrr_ordered_index_reader  ordered_index_reader;
+  Mrr_simple_index_reader   simple_index_reader;
+};
+
+
+#define DSMRR_IMPL_SORT_KEYS   HA_MRR_IMPLEMENTATION_FLAG1
+#define DSMRR_IMPL_SORT_ROWIDS HA_MRR_IMPLEMENTATION_FLAG2
+
+/*
+  DS-MRR implementation for one table. Create/use one object of this class for
+  each ha_{myisam/innobase/etc} object. That object will be further referred to
+  as "the handler"
+
+  DsMrr_impl supports has the following execution strategies:
+
+  - Bypass DS-MRR, pass all calls to default MRR implementation, which is 
+    an MRR-to-non-MRR call converter.
+  - Key-Ordered Retrieval
+  - Rowid-Ordered Retrieval
+
+  DsMrr_impl will use one of the above strategies, or a combination of them, 
+  according to the following diagram:
+
+         (mrr function calls)
+                |
+                +----------------->-----------------+
+                |                                   |
+     ___________v______________      _______________v________________
+    / default: use lookup keys \    / KEY-ORDERED RETRIEVAL:         \
+    | (or ranges) in whatever  |    | sort lookup keys and then make | 
+    | order they are supplied  |    | index lookups in index order   |
+    \__________________________/    \________________________________/
+              | |  |                           |    |
+      +---<---+ |  +--------------->-----------|----+
+      |         |                              |    |
+      |         |              +---------------+    |
+      |   ______v___ ______    |     _______________v_______________
+      |  / default: read   \   |    / ROWID-ORDERED RETRIEVAL:      \
+      |  | table records   |   |    | Before reading table records, |
+      v  | in random order |   v    | sort their rowids and then    |
+      |  \_________________/   |    | read them in rowid order      |
+      |         |              |    \_______________________________/
+      |         |              |                    |
+      |         |              |                    |
+      +-->---+  |  +----<------+-----------<--------+
+             |  |  |                                
+             v  v  v
+      (table records and range_ids)
+
+  The choice of strategy depends on MRR scan properties, table properties
+  (whether we're scanning clustered primary key), and @@optimizer_switch
+  settings.
+  
+  Key-Ordered Retrieval
+  ---------------------
+  The idea is: if MRR scan is essentially a series of lookups on 
+   
+    tbl.key=value1 OR tbl.key=value2 OR ... OR tbl.key=valueN
+  
+  then it makes sense to collect and order the set of lookup values, i.e.
+   
+     sort(value1, value2, .. valueN)
+
+  and then do index lookups in index order. This results in fewer index page
+  fetch operations, and we also can avoid making multiple index lookups for the
+  same value. That is, if value1=valueN we can easily discover that after
+  sorting and make one index lookup for them instead of two.
+
+  Rowid-Ordered Retrieval
+  -----------------------
+  If we do a regular index scan or a series of index lookups, we'll be hitting
+  table records at random. For disk-based engines, this is much slower than 
+  reading the same records in disk order. We assume that disk ordering of
+  rows is the same as ordering of their rowids (which is provided by 
+  handler::cmp_ref())
+  In order to retrieve records in different order, we must separate index
+  scanning and record fetching, that is, MRR scan uses the following steps:
+
+    1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and 
+        fill a buffer with {rowid, range_id} pairs
+    2. Sort the buffer by rowid value
+    3. for each {rowid, range_id} pair in the buffer
+         get record by rowid and return the {record, range_id} pair
+    4. Repeat the above steps until we've exhausted the list of ranges we're
+       scanning.
+
+  Buffer space management considerations
+  --------------------------------------
+  With regards to buffer/memory management, MRR interface specifies that 
+   - SQL layer provides multi_range_read_init() with buffer of certain size.
+   - MRR implementation may use (i.e. have at its disposal till the end of 
+     the MRR scan) all of the buffer, or return the unused end of the buffer 
+     to SQL layer.
+
+  DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When
+  we need to accumulate/sort only keys (or only rowids), it is fairly trivial.
+
+  When we need to accumulate/sort both keys and rowids, efficient buffer use
+  gets complicated. We need to:
+   - First, accumulate keys and sort them
+   - Then use the keys (smaller values go first) to obtain rowids. A key is not
+     needed after we've got matching rowids for it.
+   - Make sure that rowids are accumulated at the front of the buffer, so that we
+     can return the end part of the buffer to SQL layer, should there be too
+     few rowid values to occupy the buffer.
+
+  All of these goals are achieved by using the following scheme:
+
+     |                    |   We get an empty buffer from SQL layer.   
+
+     |                  *-|    
+     |               *----|   First, we fill the buffer with keys. Key_buffer
+     |            *-------|   part grows from end of the buffer space to start
+     |         *----------|   (In this picture, the buffer is big enough to
+     |      *-------------|    accomodate all keys and even have some space left)
+
+     |      *=============|   We want to do key-ordered index scan, so we sort
+                              the keys
+
+     |-x      *===========|   Then we use the keys get rowids. Rowids are 
+     |----x      *========|   stored from start of buffer space towards the end.
+     |--------x     *=====|   The part of the buffer occupied with keys
+     |------------x   *===|   gradually frees up space for rowids. In this
+     |--------------x   *=|   picture we run out of keys before we've ran out
+     |----------------x   |   of buffer space (it can be other way as well).
+
+     |================x   |   Then we sort the rowids.
+                     
+     |                |~~~|   The unused part of the buffer is at the end, so
+                              we can return it to the SQL layer.
+
+     |================*       Sorted rowids are then used to read table records 
+                              in disk order
+
+*/
+
+class DsMrr_impl
+{
+public:
+  typedef void (handler::*range_check_toggle_func_t)(bool on);
+
+  DsMrr_impl()
+    : secondary_file(NULL) {};
+  
+  void init(handler *h_arg, TABLE *table_arg)
+  {
+    primary_file= h_arg; 
+    table= table_arg;
+  }
+  int dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                 void *seq_init_param, uint n_ranges, uint mode, 
+                 HANDLER_BUFFER *buf);
+  void dsmrr_close();
+  int dsmrr_next(range_id_t *range_info);
+
+  ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, 
+                     uint *bufsz, uint *flags, COST_VECT *cost);
+
+  ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, 
+                            void *seq_init_param, uint n_ranges, uint *bufsz,
+                            uint *flags, COST_VECT *cost);
+
+  int dsmrr_explain_info(uint mrr_mode, char *str, size_t size);
+private:
+  /* Buffer to store (key, range_id) pairs */
+  Lifo_buffer *key_buffer;
+
+  /*
+    The "owner" handler object (the one that is expected to "own" this object
+    and call its functions).
+  */
+  handler *primary_file;
+  TABLE *table; /* Always equal to primary_file->table */
+
+  /*
+    Secondary handler object. (created when needed, we need it when we need 
+    to run both index scan and rnd_pos() scan at the same time)
+  */
+  handler *secondary_file;
+  
+  uint keyno; /* index we're running the scan on */
+  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+
+  Mrr_reader_factory reader_factory;
+
+  Mrr_reader *strategy;
+  bool strategy_exhausted;
+
+  Mrr_index_reader *index_strategy;
+
+  /* The whole buffer space that we're using */
+  uchar *full_buf;
+  uchar *full_buf_end;
+  
+  /* 
+    When using both rowid and key buffers: the boundary between key and rowid
+    parts of the buffer. This is the "original" value, actual memory ranges 
+    used by key and rowid parts may be different because of dynamic space 
+    reallocation between them.
+  */
+  uchar *rowid_buffer_end;
+ 
+  /*
+    One of the following two is used for key buffer: forward is used when 
+    we only need key buffer, backward is used when we need both key and rowid
+    buffers.
+  */
+  Forward_lifo_buffer forward_key_buf;
+  Backward_lifo_buffer backward_key_buf;
+
+  /*
+    Buffer to store (rowid, range_id) pairs, or just rowids if 
+    is_mrr_assoc==FALSE
+  */
+  Forward_lifo_buffer rowid_buffer;
+  
+  bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
+                       COST_VECT *cost);
+  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
+                               uint *buffer_size, COST_VECT *cost);
+  bool check_cpk_scan(THD *thd, uint keyno, uint mrr_flags);
+
+  bool setup_buffer_sharing(uint key_size_in_keybuf, key_part_map key_tuple_map);
+
+  /* Buffer_manager and its member functions */
+  Buffer_manager buf_manager;
+  static void redistribute_buffer_space(void *dsmrr_arg);
+  static void reset_buffer_sizes(void *dsmrr_arg);
+  static void do_nothing(void *dsmrr_arg);
+
+  Lifo_buffer* get_key_buffer() { return key_buffer; }
+
+  friend class Key_value_records_iterator;
+  friend class Mrr_ordered_index_reader;
+  friend class Mrr_ordered_rndpos_reader;
+
+  int  setup_two_handlers();
+  void close_second_handler();
+};
+
+/**
+  @} (end of group DS-MRR declarations)
+*/
+
diff --git a/sql/my_decimal.cc b/sql/my_decimal.cc
index 85872f52194..39be927fd27 100644
--- a/sql/my_decimal.cc
+++ b/sql/my_decimal.cc
@@ -21,6 +21,10 @@
 #include "sql_class.h"                          // THD
 #endif
 
+#define DIG_BASE     1000000000
+#define DIG_PER_DEC1 9
+#define ROUND_UP(X)  (((X)+DIG_PER_DEC1-1)/DIG_PER_DEC1)
+
 #ifndef MYSQL_CLIENT
 /**
   report result of decimal operation.
@@ -34,21 +38,20 @@
     result
 */
 
-int decimal_operation_results(int result)
+int decimal_operation_results(int result, const char *value, const char *type)
 {
   switch (result) {
   case E_DEC_OK:
     break;
   case E_DEC_TRUNCATED:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-			WARN_DATA_TRUNCATED, ER(WARN_DATA_TRUNCATED),
-			"", (long)-1);
+			ER_DATA_TRUNCATED, ER(ER_DATA_TRUNCATED),
+			value, type);
     break;
   case E_DEC_OVERFLOW:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                        ER_TRUNCATED_WRONG_VALUE,
-                        ER(ER_TRUNCATED_WRONG_VALUE),
-			"DECIMAL", "");
+                        ER_DATA_OVERFLOW, ER(ER_DATA_OVERFLOW),
+			value, type);
     break;
   case E_DEC_DIV_ZERO:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
@@ -56,9 +59,8 @@ int decimal_operation_results(int result)
     break;
   case E_DEC_BAD_NUM:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-			ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
-			ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
-			"decimal", "", "", (long)-1);
+			ER_BAD_DATA, ER(ER_BAD_DATA),
+			value, type);
     break;
   case E_DEC_OOM:
     my_error(ER_OUT_OF_RESOURCES, MYF(0));
@@ -266,20 +268,69 @@ int str2my_decimal(uint mask, const char *from, uint length,
 }
 
 
+/**
+  converts a decimal into a pair of integers - for integer and fractional parts
+
+  special version, for decimals representing number of seconds.
+  integer part cannot be larger that 1e18 (otherwise it's an overflow).
+  fractional part is microseconds.
+*/
+bool my_decimal2seconds(const my_decimal *d, ulonglong *sec, ulong *microsec)
+{
+  int pos;
+  
+  if (d->intg)
+  {
+    pos= (d->intg-1)/DIG_PER_DEC1;
+    *sec= d->buf[pos];
+    if (pos > 0)
+      *sec+= static_cast<longlong>(d->buf[pos-1]) * DIG_BASE;
+  }
+  else
+  {
+    *sec=0;
+    pos= -1;
+  }
+
+  *microsec= d->frac ? static_cast<longlong>(d->buf[pos+1]) / (DIG_BASE/1000000) : 0;
+
+  if (pos > 1)
+  {
+    for (int i=0; i < pos-1; i++)
+      if (d->buf[i])
+      {
+        *sec= LONGLONG_MAX;
+        break;
+      }
+  }
+  return d->sign();
+}
+
+
+/**
+  converts a pair of integers (seconds, microseconds) into a decimal
+*/
+my_decimal *seconds2my_decimal(bool sign,
+                               ulonglong sec, ulong microsec, my_decimal *d)
+{
+  d->init();
+  longlong2decimal(sec, d); // cannot fail
+  if (microsec)
+  {
+    d->buf[(d->intg-1) / DIG_PER_DEC1 + 1]= microsec * (DIG_BASE/1000000);
+    d->frac= 6;
+  }
+  ((decimal_t *)d)->sign= sign;
+  return d;
+}
+
+
 my_decimal *date2my_decimal(MYSQL_TIME *ltime, my_decimal *dec)
 {
-  longlong date;
-  date = (ltime->year*100L + ltime->month)*100L + ltime->day;
+  longlong date= (ltime->year*100L + ltime->month)*100L + ltime->day;
   if (ltime->time_type > MYSQL_TIMESTAMP_DATE)
     date= ((date*100L + ltime->hour)*100L+ ltime->minute)*100L + ltime->second;
-  if (int2my_decimal(E_DEC_FATAL_ERROR, ltime->neg ? -date : date, FALSE, dec))
-    return dec;
-  if (ltime->second_part)
-  {
-    dec->buf[(dec->intg-1) / 9 + 1]= ltime->second_part * 1000;
-    dec->frac= 6;
-  }
-  return dec;
+  return seconds2my_decimal(ltime->neg, date, ltime->second_part, dec);
 }
 
 
@@ -294,12 +345,37 @@ void my_decimal_trim(ulong *precision, uint *scale)
 }
 
 
+/*
+  Convert a decimal to an ulong with a descriptive error message
+*/
+
+int my_decimal2int(uint mask, const decimal_t *d, bool unsigned_flag,
+		   longlong *l)
+{
+  int res;
+  my_decimal rounded;
+  /* decimal_round can return only E_DEC_TRUNCATED */
+  decimal_round(d, &rounded, 0, HALF_UP);
+  res= (unsigned_flag ?
+        decimal2ulonglong(&rounded, (ulonglong *) l) :
+        decimal2longlong(&rounded, l));
+  if (res & mask)
+  {
+    char buff[DECIMAL_MAX_STR_LENGTH];
+    int length= sizeof(buff);
+    decimal2string(d, buff, &length, 0, 0, 0);
+
+    decimal_operation_results(res, buff,
+                              unsigned_flag ? "UNSIGNED INT" :
+                              "INT");
+  }
+  return res;
+}
+
+
 #ifndef DBUG_OFF
 /* routines for debugging print */
 
-#define DIG_PER_DEC1 9
-#define ROUND_UP(X)  (((X)+DIG_PER_DEC1-1)/DIG_PER_DEC1)
-
 /* print decimal */
 void
 print_decimal(const my_decimal *dec)
@@ -340,7 +416,6 @@ const char *dbug_decimal_as_string(char *buff, const my_decimal *val)
   return buff;
 }
 
-#endif /*DBUG_OFF*/
-
 
+#endif /*DBUG_OFF*/
 #endif /*MYSQL_CLIENT*/
diff --git a/sql/my_decimal.h b/sql/my_decimal.h
index 548d5ea3a53..e0c47029940 100644
--- a/sql/my_decimal.h
+++ b/sql/my_decimal.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2005, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -34,38 +35,12 @@
 
 C_MODE_START
 #include <decimal.h>
+#include <my_decimal_limits.h>
 C_MODE_END
 
 class String;
 typedef struct st_mysql_time MYSQL_TIME;
 
-#define DECIMAL_LONGLONG_DIGITS 22
-#define DECIMAL_LONG_DIGITS 10
-#define DECIMAL_LONG3_DIGITS 8
-
-/** maximum length of buffer in our big digits (uint32). */
-#define DECIMAL_BUFF_LENGTH 9
-
-/* the number of digits that my_decimal can possibly contain */
-#define DECIMAL_MAX_POSSIBLE_PRECISION (DECIMAL_BUFF_LENGTH * 9)
-
-
-/**
-  maximum guaranteed precision of number in decimal digits (number of our
-  digits * number of decimal digits in one our big digit - number of decimal
-  digits in one our big digit decreased by 1 (because we always put decimal
-  point on the border of our big digits))
-*/
-#define DECIMAL_MAX_PRECISION (DECIMAL_MAX_POSSIBLE_PRECISION - 8*2)
-#define DECIMAL_MAX_SCALE 30
-#define DECIMAL_NOT_SPECIFIED 31
-
-/**
-  maximum length of string representation (number of maximum decimal
-  digits + 1 position for sign + 1 position for decimal point, no terminator)
-*/
-#define DECIMAL_MAX_STR_LENGTH (DECIMAL_MAX_POSSIBLE_PRECISION + 2)
-
 /**
   maximum size of packet length.
 */
@@ -159,9 +134,10 @@ bool str_set_decimal(uint mask, const my_decimal *val, uint fixed_prec,
 extern my_decimal decimal_zero;
 
 #ifndef MYSQL_CLIENT
-int decimal_operation_results(int result);
+int decimal_operation_results(int result, const char *value, const char *type);
 #else
-inline int decimal_operation_results(int result)
+inline int decimal_operation_results(int result, const char *value,
+                                     const char *type)
 {
   return result;
 }
@@ -183,7 +159,7 @@ inline void max_internal_decimal(my_decimal *to)
 inline int check_result(uint mask, int result)
 {
   if (result & mask)
-    decimal_operation_results(result);
+    decimal_operation_results(result, "", "DECIMAL");
   return result;
 }
 
@@ -331,21 +307,16 @@ int my_decimal2string(uint mask, const my_decimal *d, uint fixed_prec,
 		      uint fixed_dec, char filler, String *str);
 #endif
 
-inline
-int my_decimal2int(uint mask, const my_decimal *d, my_bool unsigned_flag,
-		   longlong *l)
-{
-  my_decimal rounded;
-  /* decimal_round can return only E_DEC_TRUNCATED */
-  decimal_round(d, &rounded, 0, HALF_UP);
-  return check_result(mask, (unsigned_flag ?
-			     decimal2ulonglong(&rounded, (ulonglong *)l) :
-			     decimal2longlong(&rounded, l)));
-}
+bool my_decimal2seconds(const my_decimal *d, ulonglong *sec, ulong *microsec);
+
+my_decimal *seconds2my_decimal(bool sign, ulonglong sec, ulong microsec,
+                               my_decimal *d);
 
+int my_decimal2int(uint mask, const decimal_t *d, bool unsigned_flag,
+		   longlong *l);
 
 inline
-int my_decimal2double(uint, const my_decimal *d, double *result)
+int my_decimal2double(uint, const decimal_t *d, double *result)
 {
   /* No need to call check_result as this will always succeed */
   return decimal2double(d, result);
@@ -390,6 +361,16 @@ int int2my_decimal(uint mask, longlong i, my_bool unsigned_flag, my_decimal *d)
 			     longlong2decimal(i, d)));
 }
 
+inline
+void decimal2my_decimal(decimal_t *from, my_decimal *to)
+{
+  DBUG_ASSERT(to->len >= from->len);
+  to->intg= from->intg;
+  to->frac= from->frac;
+  to->sign(from->sign);
+  memcpy(to->buf, from->buf, to->len*sizeof(decimal_digit_t));
+}
+
 
 inline
 void my_decimal_neg(decimal_t *arg)
@@ -452,7 +433,6 @@ int my_decimal_mod(uint mask, my_decimal *res, const my_decimal *a,
                                    res);
 }
 
-
 /**
   @return
     -1 if a<b, 1 if a>b and 0 if a==b
diff --git a/sql/mysql_install_db.cc b/sql/mysql_install_db.cc
new file mode 100644
index 00000000000..086dc292dec
--- /dev/null
+++ b/sql/mysql_install_db.cc
@@ -0,0 +1,637 @@
+/* Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  mysql_install_db creates a new database instance (optionally as service)
+  on Windows.
+*/
+#define DONT_DEFINE_VOID
+#include <my_global.h>
+#include <my_getopt.h>
+#include <my_sys.h>
+#include <m_string.h>
+
+#include <windows.h>
+#include <assert.h>
+#include <shellapi.h>
+#include <accctrl.h>
+#include <aclapi.h>
+
+#define USAGETEXT \
+"mysql_install_db.exe  Ver 1.00 for Windows\n" \
+"Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub\n" \
+"This software comes with ABSOLUTELY NO WARRANTY. This is free software,\n" \
+"and you are welcome to modify and redistribute it under the GPL v2 license\n" \
+"Usage: mysql_install_db.exe [OPTIONS]\n" \
+"OPTIONS:"
+
+extern "C" const char mysql_bootstrap_sql[];
+
+char default_os_user[]= "NT AUTHORITY\\NetworkService";
+static int create_db_instance();
+static uint opt_silent;
+static char datadir_buffer[FN_REFLEN];
+static char mysqld_path[FN_REFLEN];
+static char *opt_datadir;
+static char *opt_service;
+static char *opt_password;
+static int  opt_port;
+static char *opt_socket;
+static char *opt_os_user;
+static char *opt_os_password;
+static my_bool opt_default_user;
+static my_bool opt_allow_remote_root_access;
+static my_bool opt_skip_networking;
+static my_bool verbose_errors;
+
+
+static struct my_option my_long_options[]=
+{
+  {"help", '?', "Display this help message and exit.", 0, 0, 0, GET_NO_ARG,
+   NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"datadir", 'd', "Data directory of the new database",
+  &opt_datadir, &opt_datadir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"service", 'S', "Name of the Windows service",
+  &opt_service, &opt_service, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"password", 'p', "Root password",
+  &opt_password, &opt_password, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"port", 'P', "mysql port",
+  &opt_port, &opt_port, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"socket", 'W', 
+  "named pipe name (if missing, it will be set the same as service)",
+  &opt_socket, &opt_socket, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"default-user", 'D', "Create default user",
+  &opt_default_user, &opt_default_user, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0},
+  {"allow-remote-root-access", 'R', 
+  "Allows remote access from network for user root",
+  &opt_allow_remote_root_access, &opt_allow_remote_root_access, 0 , GET_BOOL, 
+  OPT_ARG, 0, 0, 0, 0, 0, 0},
+  {"skip-networking", 'N', "Do not use TCP connections, use pipe instead",
+  &opt_skip_networking, &opt_skip_networking, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0,
+  0, 0},
+  {"silent", 's', "Print less information", &opt_silent,
+   &opt_silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+static my_bool
+get_one_option(int optid, 
+   const struct my_option *opt __attribute__ ((unused)),
+   char *argument __attribute__ ((unused)))
+{
+  DBUG_ENTER("get_one_option");
+  switch (optid) {
+  case '?':
+    printf("%s\n", USAGETEXT);
+    my_print_help(my_long_options);
+    exit(0);
+    break;
+  }
+  DBUG_RETURN(0);
+}
+
+
+static void die(const char *fmt, ...)
+{
+  va_list args;
+  DBUG_ENTER("die");
+
+  /* Print the error message */
+  va_start(args, fmt);
+  fprintf(stderr, "FATAL ERROR: ");
+  vfprintf(stderr, fmt, args);
+  fputc('\n', stderr);
+  if (verbose_errors)
+  {
+   fprintf(stderr,
+   "http://kb.askmonty.org/v/installation-issues-on-windows contains some help\n"
+   "for solving the most common problems.  If this doesn't help you, please\n"
+   "leave a comment in the knowledge base or file a bug report at\n"
+   "https://bugs.launchpad.net/maria");
+  }
+  fflush(stderr);
+  va_end(args);
+  my_end(0);
+  exit(1);
+}
+
+
+static void verbose(const char *fmt, ...)
+{
+  va_list args;
+
+  if (opt_silent)
+    return;
+
+  /* Print the verbose message */
+  va_start(args, fmt);
+  vfprintf(stdout, fmt, args);
+  fputc('\n', stdout);
+  fflush(stdout);
+  va_end(args);
+}
+
+
+int main(int argc, char **argv)
+{
+  int error;
+  char self_name[FN_REFLEN];
+  char *p;
+
+  MY_INIT(argv[0]);
+  GetModuleFileName(NULL, self_name, FN_REFLEN);
+  strcpy(mysqld_path,self_name);
+  p= strrchr(mysqld_path, FN_LIBCHAR);
+  if (p)
+  {
+    strcpy(p, "\\mysqld.exe");
+  }
+
+  if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
+    exit(error);
+  if (!opt_datadir)
+  {
+    my_print_help(my_long_options);
+    die("parameter --datadir=# is mandatory");
+  }
+
+  /* Print some help on errors */
+  verbose_errors= TRUE;
+
+  if (!opt_os_user)
+  {
+    opt_os_user= default_os_user;
+    opt_os_password= NULL;
+  }
+  /* Workaround WiX bug (strip possible quote character at the end of path) */
+  size_t len= strlen(opt_datadir);
+  if (len > 0)
+  {
+    if (opt_datadir[len-1] == '"')
+    {
+      opt_datadir[len-1]= 0;
+    }
+  }
+  GetFullPathName(opt_datadir, FN_REFLEN, datadir_buffer, NULL);
+  opt_datadir= datadir_buffer;
+
+  if (create_db_instance())
+  {
+    die("database creation failed");
+  }
+
+  printf("Creation of the database was successfull");
+  return 0;
+}
+
+
+
+/**
+  Convert slashes in paths into MySQL-compatible form
+*/
+
+static void convert_slashes(char *s)
+{
+  for (; *s ; s++)
+   if (*s == '\\')
+     *s= '/';
+}
+
+
+/**
+  Calculate basedir from mysqld.exe path.
+  Basedir assumed to be is one level up from the mysqld.exe directory location.
+  E.g basedir for C:\my\bin\mysqld.exe would be C:\my
+*/
+
+static void get_basedir(char *basedir, int size, const char *mysqld_path)
+{
+  strcpy_s(basedir, size,  mysqld_path);
+  convert_slashes(basedir);
+  char *p= strrchr(basedir,'/');
+  if (p)
+  {
+    *p = 0;
+    p= strrchr(basedir, '/');
+    if (p)
+      *p= 0;
+  }
+}
+
+
+/**
+  Allocate and initialize command line for mysqld --bootstrap.
+ The resulting string is passed to popen, so it has a lot of quoting
+ quoting around the full string plus quoting around parameters with spaces.
+*/
+
+static char *init_bootstrap_command_line(char *cmdline, size_t size)
+{
+  char basedir[MAX_PATH];
+  get_basedir(basedir, sizeof(basedir), mysqld_path);
+
+  my_snprintf(cmdline, size-1, 
+    "\"\"%s\" --no-defaults --bootstrap"
+    " \"--language=%s\\share\\english\""
+    " --basedir=. --datadir=. --default-storage-engine=myisam"
+    " --max_allowed_packet=9M --loose-skip-innodb --loose-skip-pbxt"
+    " --net-buffer-length=16k\"", mysqld_path, basedir);
+  return cmdline;
+}
+
+
+/**
+  Create my.ini in  current directory (this is assumed to be
+  data directory as well).
+*/
+
+static int create_myini()
+{
+  my_bool enable_named_pipe= FALSE;
+  printf("Creating my.ini file\n");
+
+  char path_buf[MAX_PATH];
+  GetCurrentDirectory(MAX_PATH, path_buf);
+
+  /* Create ini file. */
+  FILE *myini= fopen("my.ini","wt");
+  if (!myini)
+  {
+    die("Cannot create my.ini in data directory");
+  }
+
+  /* Write out server settings. */
+  fprintf(myini, "[mysqld]\n");
+  convert_slashes(path_buf);
+  fprintf(myini, "datadir=%s\n", path_buf);
+  if (opt_skip_networking)
+  {
+    fprintf(myini,"skip-networking\n");
+    if (!opt_socket)
+      opt_socket= opt_service;
+  }
+  enable_named_pipe= (my_bool) 
+    ((opt_socket && opt_socket[0]) || opt_skip_networking);
+
+  if (enable_named_pipe)
+  {
+    fprintf(myini,"enable-named-pipe\n");
+  }
+
+  if (opt_socket && opt_socket[0])
+  {
+    fprintf(myini, "socket=%s\n", opt_socket);
+  }
+  if (opt_port)
+  {
+    fprintf(myini,"port=%d\n", opt_port);
+  }
+
+  /* Write out client settings. */
+  fprintf(myini, "[client]\n");
+
+  /* Used for named pipes */
+  if (opt_socket && opt_socket[0])
+    fprintf(myini,"socket=%s\n",opt_socket);
+  if (opt_skip_networking)
+    fprintf(myini,"protocol=pipe\n");
+  else if (opt_port)
+    fprintf(myini,"port=%d\n",opt_port);
+  fclose(myini);
+  return 0;
+}
+
+
+static const char update_root_passwd_part1[]=
+  "UPDATE mysql.user SET Password = PASSWORD('";
+static const char update_root_passwd_part2[]=
+  "') where User='root';\n";
+static const char remove_default_user_cmd[]= 
+  "DELETE FROM mysql.user where User='';\n";
+static const char allow_remote_root_access_cmd[]=
+  "CREATE TEMPORARY TABLE tmp_user LIKE user;\n"
+  "INSERT INTO tmp_user SELECT * from user where user='root' "
+    " AND host='localhost';\n"
+  "UPDATE tmp_user SET host='%';\n"
+  "INSERT INTO user SELECT * FROM tmp_user;\n"
+  "DROP TABLE tmp_user;\n";
+static const char end_of_script[]="-- end.";
+
+/* Register service. Assume my.ini is in datadir */
+
+static int register_service()
+{
+  char buf[3*MAX_PATH +32]; /* path to mysqld.exe, to my.ini, service name */
+  SC_HANDLE sc_manager, sc_service;
+
+  size_t datadir_len= strlen(opt_datadir);
+  const char *backslash_after_datadir= "\\";
+
+  if (datadir_len && opt_datadir[datadir_len-1] == '\\')
+    backslash_after_datadir= "";
+
+  verbose("Registering service '%s'", opt_service);
+  my_snprintf(buf, sizeof(buf)-1,
+    "\"%s\" \"--defaults-file=%s%smy.ini\" \"%s\"" ,  mysqld_path, opt_datadir, 
+    backslash_after_datadir, opt_service);
+
+  /* Get a handle to the SCM database. */ 
+  sc_manager= OpenSCManager( NULL, NULL, SC_MANAGER_ALL_ACCESS);
+  if (!sc_manager) 
+  {
+    die("OpenSCManager failed (%u)\n", GetLastError());
+  }
+
+  /* Create the service. */
+  sc_service= CreateService(sc_manager, opt_service,  opt_service,
+    SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS, SERVICE_AUTO_START, 
+    SERVICE_ERROR_NORMAL, buf, NULL, NULL, NULL, opt_os_user, opt_os_password);
+
+  if (!sc_service) 
+  {
+    CloseServiceHandle(sc_manager);
+    die("CreateService failed (%u)", GetLastError());
+  }
+
+  SERVICE_DESCRIPTION sd= { "MariaDB database server" };
+  ChangeServiceConfig2(sc_service, SERVICE_CONFIG_DESCRIPTION, &sd);
+  CloseServiceHandle(sc_service); 
+  CloseServiceHandle(sc_manager);
+  return 0;
+}
+
+
+static void clean_directory(const char *dir)
+{
+  char dir2[MAX_PATH+2];
+  *(strmake(dir2, dir, MAX_PATH+1)+1)= 0;
+
+  SHFILEOPSTRUCT fileop;
+  fileop.hwnd= NULL;    /* no status display */
+  fileop.wFunc= FO_DELETE;  /* delete operation */
+  fileop.pFrom= dir2;  /* source file name as double null terminated string */
+  fileop.pTo= NULL;    /* no destination needed */
+  fileop.fFlags= FOF_NOCONFIRMATION|FOF_SILENT;  /* do not prompt the user */
+
+
+  fileop.fAnyOperationsAborted= FALSE;
+  fileop.lpszProgressTitle= NULL;
+  fileop.hNameMappings= NULL;
+
+  SHFileOperation(&fileop);
+}
+
+
+/*
+  Define directory permission to have inheritable all access for a user
+  (defined as username or group string or as SID)
+*/
+
+static int set_directory_permissions(const char *dir, const char *os_user)
+{
+
+   struct{
+        TOKEN_USER tokenUser;
+        BYTE buffer[SECURITY_MAX_SID_SIZE];
+   } tokenInfoBuffer;
+
+  HANDLE hDir= CreateFile(dir,READ_CONTROL|WRITE_DAC,0,NULL,OPEN_EXISTING,
+    FILE_FLAG_BACKUP_SEMANTICS,NULL);
+  if (hDir == INVALID_HANDLE_VALUE) 
+    return -1;  
+  ACL* pOldDACL;
+  SECURITY_DESCRIPTOR* pSD= NULL; 
+  EXPLICIT_ACCESS ea={0};
+  BOOL isWellKnownSID= FALSE;
+  WELL_KNOWN_SID_TYPE wellKnownSidType = WinNullSid;
+  PSID pSid= NULL;
+
+  GetSecurityInfo(hDir, SE_FILE_OBJECT , DACL_SECURITY_INFORMATION,NULL, NULL,
+    &pOldDACL, NULL, (void**)&pSD); 
+
+  if (os_user)
+  {
+    /* Check for 3 predefined service users 
+       They might have localized names in non-English Windows, thus they need
+       to be handled using well-known SIDs.
+    */
+    if (stricmp(os_user, "NT AUTHORITY\\NetworkService") == 0)
+    {
+      wellKnownSidType= WinNetworkServiceSid;
+    }
+    else if (stricmp(os_user, "NT AUTHORITY\\LocalService") == 0)
+    {
+      wellKnownSidType= WinLocalServiceSid;
+    }
+    else if (stricmp(os_user, "NT AUTHORITY\\LocalSystem") == 0)
+    {
+      wellKnownSidType= WinLocalSystemSid;
+    }
+
+    if (wellKnownSidType != WinNullSid)
+    {
+      DWORD size= SECURITY_MAX_SID_SIZE;
+      pSid= (PSID)tokenInfoBuffer.buffer;
+      if (!CreateWellKnownSid(wellKnownSidType, NULL, pSid,
+        &size))
+      {
+        return 1;
+      }
+      ea.Trustee.TrusteeForm= TRUSTEE_IS_SID;
+      ea.Trustee.ptstrName= (LPTSTR)pSid;
+    }
+    else
+    {
+      ea.Trustee.TrusteeForm= TRUSTEE_IS_NAME;
+      ea.Trustee.ptstrName= (LPSTR)os_user;
+    }
+  }
+  else
+  {
+    HANDLE token;
+    if (OpenProcessToken(GetCurrentProcess(),TOKEN_QUERY, &token))
+    {
+
+      DWORD length= (DWORD) sizeof(tokenInfoBuffer);
+      if (GetTokenInformation(token, TokenUser, &tokenInfoBuffer, 
+        length, &length))
+      {
+        pSid= tokenInfoBuffer.tokenUser.User.Sid;
+      }
+    }
+    if (!pSid)
+      return 0;
+    ea.Trustee.TrusteeForm= TRUSTEE_IS_SID;
+    ea.Trustee.ptstrName= (LPTSTR)pSid;
+  }
+  ea.grfAccessMode= GRANT_ACCESS;
+  ea.grfAccessPermissions= GENERIC_ALL; 
+  ea.grfInheritance= CONTAINER_INHERIT_ACE|OBJECT_INHERIT_ACE; 
+  ea.Trustee.TrusteeType= TRUSTEE_IS_UNKNOWN; 
+  ACL* pNewDACL= 0; 
+  DWORD err= SetEntriesInAcl(1,&ea,pOldDACL,&pNewDACL); 
+  if (pNewDACL)
+  {
+    SetSecurityInfo(hDir,SE_FILE_OBJECT,DACL_SECURITY_INFORMATION,NULL, NULL,
+      pNewDACL, NULL);
+  }
+  if (pSD != NULL) 
+    LocalFree((HLOCAL) pSD); 
+  if (pNewDACL != NULL) 
+    LocalFree((HLOCAL) pNewDACL);
+  CloseHandle(hDir); 
+  return 0;
+}
+
+
+/* 
+  Give directory permissions for special service user NT SERVICE\servicename
+  this user is available only on Win7 and later.
+*/
+
+void grant_directory_permissions_to_service()
+{
+  char service_user[MAX_PATH+ 12];
+  OSVERSIONINFO info;
+  info.dwOSVersionInfoSize= sizeof(info);
+  GetVersionEx(&info);
+  if (info.dwMajorVersion >6 || 
+    (info.dwMajorVersion== 6 && info.dwMinorVersion > 0)
+    && opt_service)
+  {
+    my_snprintf(service_user,sizeof(service_user), "NT SERVICE\\%s", 
+      opt_service);
+    set_directory_permissions(opt_datadir, service_user);
+  }
+}
+
+
+/* Create database instance (including registering as service etc) .*/
+
+static int create_db_instance()
+{
+  int ret= 0;
+  char cwd[MAX_PATH];
+  DWORD cwd_len= MAX_PATH;
+  char cmdline[3*MAX_PATH];
+  FILE *in;
+
+  verbose("Running bootstrap");
+
+  GetCurrentDirectory(cwd_len, cwd);
+  CreateDirectory(opt_datadir, NULL); /*ignore error, it might already exist */
+
+  if (!SetCurrentDirectory(opt_datadir))
+  {
+    die("Cannot set current directory to '%s'\n",opt_datadir);
+    return -1;
+  }
+
+  CreateDirectory("mysql",NULL);
+  CreateDirectory("test", NULL);
+
+  /*
+    Set data directory permissions for both current user and 
+    default_os_user (the one who runs services).
+  */
+  set_directory_permissions(opt_datadir, NULL);
+  set_directory_permissions(opt_datadir, default_os_user);
+
+  /* Do mysqld --bootstrap. */
+  init_bootstrap_command_line(cmdline, sizeof(cmdline));
+  /* verbose("Executing %s", cmdline); */
+
+  in= popen(cmdline, "wt");
+  if (!in)
+    goto end;
+
+  if (fwrite("use mysql;\n",11,1, in) != 1)
+  {
+    verbose("ERROR: Cannot write to mysqld's stdin");
+    ret= 1;
+    goto end;
+  }
+
+  /* Write the bootstrap script to stdin. */
+  if (fwrite(mysql_bootstrap_sql, strlen(mysql_bootstrap_sql), 1, in) != 1)
+  {
+    verbose("ERROR: Cannot write to mysqld's stdin");
+    ret= 1;
+    goto end;
+  }
+
+  /* Remove default user, if requested. */
+  if (!opt_default_user)
+  {
+    verbose("Removing default user",remove_default_user_cmd);
+    fputs(remove_default_user_cmd, in);
+    fflush(in);
+  }
+
+  if (opt_allow_remote_root_access)
+  {
+     verbose("Allowing remote access for user root",remove_default_user_cmd);
+     fputs(allow_remote_root_access_cmd,in);
+     fflush(in);
+  }
+
+  /* Change root password if requested. */
+  if (opt_password)
+  {
+    verbose("Changing root password",remove_default_user_cmd);
+    fputs(update_root_passwd_part1, in);
+    fputs(opt_password, in);
+    fputs(update_root_passwd_part2, in);
+    fflush(in);
+  }
+
+  /*
+    On some reason, bootstrap chokes if last command sent via stdin ends with 
+    newline, so we supply a dummy comment, that does not end with newline.
+  */
+  fputs(end_of_script, in);
+  fflush(in);
+
+  /* Check if bootstrap has completed successfully. */
+  ret= pclose(in);
+  if (ret)
+  {
+    verbose("mysqld returned error %d in pclose",ret);
+    goto end;
+  }
+
+  /* Create my.ini file in data directory.*/
+  ret= create_myini();
+  if (ret)
+    goto end;
+
+  /* Register service if requested. */
+  if (opt_service && opt_service[0])
+  {
+    ret= register_service();
+    grant_directory_permissions_to_service();
+    if (ret)
+      goto end;
+  }
+
+end:
+  if (ret)
+  {
+    SetCurrentDirectory(cwd);
+    clean_directory(opt_datadir);
+  }
+  return ret;
+}
diff --git a/sql/mysql_upgrade_service.cc b/sql/mysql_upgrade_service.cc
new file mode 100644
index 00000000000..db916101eb1
--- /dev/null
+++ b/sql/mysql_upgrade_service.cc
@@ -0,0 +1,522 @@
+/* Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  mysql_upgrade_service upgrades mysql service on Windows.
+  It changes service definition to point to the new mysqld.exe, restarts the 
+  server and runs mysql_upgrade
+*/
+
+#define DONT_DEFINE_VOID
+#include <process.h>
+#include <my_global.h>
+#include <my_getopt.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include <mysql_version.h>
+#include <winservice.h>
+
+#include <windows.h>
+
+/* We're using version APIs */
+#pragma comment(lib, "version")
+
+#define USAGETEXT \
+"mysql_upgrade_service.exe  Ver 1.00 for Windows\n" \
+"Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub" \
+"This software comes with ABSOLUTELY NO WARRANTY. This is free software,\n" \
+"and you are welcome to modify and redistribute it under the GPL v2 license\n" \
+"Usage: mysql_upgrade_service.exe [OPTIONS]\n" \
+"OPTIONS:"
+
+static char mysqld_path[MAX_PATH];
+static char mysqladmin_path[MAX_PATH];
+static char mysqlupgrade_path[MAX_PATH];
+
+static char defaults_file_param[MAX_PATH + 16]; /*--defaults-file=<path> */
+static char logfile_path[MAX_PATH];
+static char *opt_service;
+static SC_HANDLE service;
+static SC_HANDLE scm;
+HANDLE mysqld_process; // mysqld.exe started for upgrade
+DWORD initial_service_state= -1; // initial state of the service
+HANDLE logfile_handle;
+
+/*
+  Startup and shutdown timeouts, in seconds. 
+  Maybe,they can be made parameters
+*/
+static unsigned int startup_timeout= 60;
+static unsigned int shutdown_timeout= 60;
+
+static struct my_option my_long_options[]=
+{
+  {"help", '?', "Display this help message and exit.", 0, 0, 0, GET_NO_ARG,
+   NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"service", 'S', "Name of the existing Windows service",
+  &opt_service, &opt_service, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+
+static my_bool
+get_one_option(int optid, 
+   const struct my_option *opt __attribute__ ((unused)),
+   char *argument __attribute__ ((unused)))
+{
+  DBUG_ENTER("get_one_option");
+  switch (optid) {
+  case '?':
+    printf("%s\n", USAGETEXT);
+    my_print_help(my_long_options);
+    exit(0);
+    break;
+  }
+  DBUG_RETURN(0);
+}
+
+
+
+static void log(const char *fmt, ...)
+{
+  va_list args;
+  /* Print the error message */
+  va_start(args, fmt);
+  vfprintf(stdout,fmt, args);
+  va_end(args);
+  fputc('\n', stdout);
+  fflush(stdout);
+}
+
+
+static void die(const char *fmt, ...)
+{
+  va_list args;
+  DBUG_ENTER("die");
+
+  /* Print the error message */
+  va_start(args, fmt);
+
+  fprintf(stderr, "FATAL ERROR: ");
+  vfprintf(stderr, fmt, args);
+  if (logfile_path[0])
+  {
+    fprintf(stderr, "Additional information can be found in the log file %s",
+      logfile_path);
+  }
+  va_end(args);
+  fputc('\n', stderr);
+  fflush(stdout);
+  /* Cleanup */
+
+  /*
+    Stop service that we started, if it was not initally running at
+    program start.
+  */
+  if (initial_service_state != -1 && initial_service_state != SERVICE_RUNNING)
+  {
+    SERVICE_STATUS service_status;
+    ControlService(service, SERVICE_CONTROL_STOP, &service_status);
+  }
+
+  if (scm)
+    CloseServiceHandle(scm);
+  if (service)
+    CloseServiceHandle(service);
+  /* Stop mysqld.exe, if it was started for upgrade */
+  if (mysqld_process)
+    TerminateProcess(mysqld_process, 3);
+  if (logfile_handle)
+    CloseHandle(logfile_handle);
+  my_end(0);
+
+  exit(1);
+}
+
+
+/*
+  spawn-like function to run subprocesses. 
+  We also redirect the full output to the log file.
+
+  Typical usage could be something like
+  run_tool(P_NOWAIT, "cmd.exe", "/c" , "echo", "foo", NULL)
+  
+  @param    wait_flag (P_WAIT or P_NOWAIT)
+  @program  program to run
+
+  Rest of the parameters is NULL terminated strings building command line.
+
+  @return intptr containing either process handle, if P_NOWAIT is used
+  or return code of the process (if P_WAIT is used)
+*/
+
+static intptr_t run_tool(int wait_flag, const char *program,...)
+{
+  static char cmdline[32*1024];
+  char *end;
+  va_list args;
+  va_start(args, program);
+  if (!program)
+    die("Invalid call to run_tool");
+  end= strxmov(cmdline, "\"", program, "\"", NullS);
+
+  for(;;) 
+  {
+    char *param= va_arg(args,char *);
+    if(!param)
+      break;
+    end= strxmov(end, " \"", param, "\"", NullS);
+  }
+  va_end(args);
+  
+  /* Create output file if not alredy done */
+  if (!logfile_handle)
+  {
+    char tmpdir[FN_REFLEN];
+    GetTempPath(FN_REFLEN, tmpdir);
+    sprintf_s(logfile_path, "%s\\mysql_upgrade_service.%s.log", tmpdir, 
+      opt_service);
+    logfile_handle= CreateFile(logfile_path, GENERIC_WRITE,  FILE_SHARE_READ, 
+      NULL, TRUNCATE_EXISTING, 0, NULL);
+    if (!logfile_handle)
+    {
+      die("Cannot open log file %s, windows error %u", 
+        logfile_path, GetLastError());
+    }
+  }
+
+  /* Start child process */
+  STARTUPINFO si= {0};
+  si.cb= sizeof(si);
+  si.hStdInput= GetStdHandle(STD_INPUT_HANDLE);
+  si.hStdError= logfile_handle;
+  si.hStdOutput= logfile_handle;
+  si.dwFlags= STARTF_USESTDHANDLES;
+  PROCESS_INFORMATION pi;
+  if (!CreateProcess(NULL, cmdline, NULL, 
+       NULL, TRUE, NULL, NULL, NULL, &si, &pi))
+  {
+    die("CreateProcess failed (commandline %s)", cmdline);
+  }
+  CloseHandle(pi.hThread);
+
+  if (wait_flag == P_NOWAIT)
+  {
+    /* Do not wait for process to complete, return handle. */
+    return (intptr_t)pi.hProcess;
+  }
+
+  /* Wait for process to complete. */
+  if (WaitForSingleObject(pi.hProcess, INFINITE) != WAIT_OBJECT_0)
+  {
+    die("WaitForSingleObject() failed");
+  }
+  DWORD exit_code;
+  if (!GetExitCodeProcess(pi.hProcess, &exit_code))
+  {
+    die("GetExitCodeProcess() failed");
+  }
+  return (intptr_t)exit_code;
+}
+
+
+void stop_mysqld_service()
+{
+  DWORD needed;
+  SERVICE_STATUS_PROCESS ssp;
+  int timeout= shutdown_timeout*1000; 
+  for(;;)
+  {
+    if (!QueryServiceStatusEx(service, SC_STATUS_PROCESS_INFO,
+          (LPBYTE)&ssp, 
+          sizeof(SERVICE_STATUS_PROCESS),
+          &needed))
+    {
+      die("QueryServiceStatusEx failed (%u)\n", GetLastError()); 
+    }
+
+    /*
+      Remeber initial state of the service, we will restore it on
+      exit.
+    */
+    if(initial_service_state == -1)
+      initial_service_state= ssp.dwCurrentState;
+
+    switch(ssp.dwCurrentState)
+    {
+      case SERVICE_STOPPED:
+        return;
+      case SERVICE_RUNNING:
+        if(!ControlService(service, SERVICE_CONTROL_STOP, 
+             (SERVICE_STATUS *)&ssp))
+            die("ControlService failed, error %u\n", GetLastError());
+      case SERVICE_START_PENDING:
+      case SERVICE_STOP_PENDING:
+        if(timeout < 0)
+          die("Service does not stop after %d seconds timeout",shutdown_timeout);
+        Sleep(100);
+        timeout -= 100;
+        break;
+      default:
+        die("Unexpected service state %d",ssp.dwCurrentState);
+    }
+  }
+}
+
+
+/* 
+  Shutdown mysql server. Not using mysqladmin, since 
+  our --skip-grant-tables do not work anymore after mysql_upgrade
+  that does "flush privileges". Instead, the shutdown event  is set.
+*/
+void initiate_mysqld_shutdown()
+{
+  char event_name[32];
+  DWORD pid= GetProcessId(mysqld_process);
+  sprintf_s(event_name, "MySQLShutdown%d", pid);
+  HANDLE shutdown_handle= OpenEvent(EVENT_MODIFY_STATE, FALSE, event_name);
+  if(!shutdown_handle)
+  {
+    die("OpenEvent() failed for shutdown event");
+  }
+
+  if(!SetEvent(shutdown_handle))
+  {
+    die("SetEvent() failed");
+  }
+}
+
+
+/*
+  Change service configuration (binPath) to point to mysqld from 
+  this installation.
+*/
+static void change_service_config()
+{
+
+  char defaults_file[MAX_PATH];
+  char default_character_set[64];
+  char buf[MAX_PATH];
+  char commandline[3*MAX_PATH + 19];
+  int i;
+
+  scm= OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS);
+  if(!scm)
+    die("OpenSCManager failed with %u", GetLastError());
+  service= OpenService(scm, opt_service, SERVICE_ALL_ACCESS);
+  if (!service)
+    die("OpenService failed with %u", GetLastError());
+
+  BYTE config_buffer[8*1024];
+  LPQUERY_SERVICE_CONFIGW config= (LPQUERY_SERVICE_CONFIGW)config_buffer;
+  DWORD size= sizeof(config_buffer);
+  DWORD needed;
+  if (!QueryServiceConfigW(service, config, size, &needed))
+    die("QueryServiceConfig failed with %u", GetLastError());
+
+  mysqld_service_properties props;
+  if (get_mysql_service_properties(config->lpBinaryPathName, &props))
+  {
+    die("Not a valid MySQL service");
+  }
+
+  int my_major= MYSQL_VERSION_ID/10000;
+  int my_minor= (MYSQL_VERSION_ID %10000)/100;
+  int my_patch= MYSQL_VERSION_ID%100;
+
+  if(my_major < props.version_major || 
+    (my_major == props.version_major && my_minor < props.version_minor))
+  {
+    die("Can not downgrade, the service is currently running as version %d.%d.%d"
+      ", my version is %d.%d.%d", props.version_major, props.version_minor, 
+      props.version_patch, my_major, my_minor, my_patch);
+  }
+
+  if(props.inifile[0] == 0)
+  {
+    /*
+      Weird case, no --defaults-file in service definition, need to create one.
+    */
+    sprintf_s(props.inifile, MAX_PATH, "%s\\my.ini", props.datadir);
+  }
+
+  /*
+    Write datadir to my.ini, after converting  backslashes to 
+    unix style slashes.
+  */
+  strcpy_s(buf, MAX_PATH, props.datadir);
+  for(i= 0; buf[i]; i++)
+  {
+    if (buf[i] == '\\')
+      buf[i]= '/';
+  }
+  WritePrivateProfileString("mysqld", "datadir",buf, props.inifile);
+
+  /*
+    Remove basedir from defaults file, otherwise the service wont come up in 
+    the new version, and will complain about mismatched message file.
+  */
+  WritePrivateProfileString("mysqld", "basedir",NULL, props.inifile);
+
+  /* 
+    Replace default-character-set  with character-set-server, to avoid 
+    "default-character-set is deprecated and will be replaced ..."
+    message.
+  */
+  default_character_set[0]= 0;
+  GetPrivateProfileString("mysqld", "default-character-set", NULL,
+    default_character_set, sizeof(default_character_set), defaults_file);
+  if (default_character_set[0])
+  {
+    WritePrivateProfileString("mysqld", "default-character-set", NULL, 
+      defaults_file);
+    WritePrivateProfileString("mysqld", "character-set-server",
+      default_character_set, defaults_file);
+  }
+
+  sprintf(defaults_file_param,"--defaults-file=%s", props.inifile);
+  sprintf_s(commandline, "\"%s\" \"%s\" \"%s\"", mysqld_path, 
+   defaults_file_param, opt_service);
+  if (!ChangeServiceConfig(service, SERVICE_NO_CHANGE, SERVICE_NO_CHANGE, 
+         SERVICE_NO_CHANGE, commandline, NULL, NULL, NULL, NULL, NULL, NULL))
+  {
+    die("ChangeServiceConfig failed with %u", GetLastError());
+  }
+
+}
+
+
+int main(int argc, char **argv)
+{
+  int error;
+  MY_INIT(argv[0]);
+  char bindir[FN_REFLEN];
+  char *p;
+
+  /* Parse options */
+  if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
+    die("");
+  if (!opt_service)
+    die("--service=# parameter is mandatory");
+ 
+ /*
+    Get full path to mysqld, we need it when changing service configuration.
+    Assume installation layout, i.e mysqld.exe, mysqladmin.exe, mysqlupgrade.exe
+    and mysql_upgrade_service.exe are in the same directory.
+  */
+  GetModuleFileName(NULL, bindir, FN_REFLEN);
+  p= strrchr(bindir, FN_LIBCHAR);
+  if(p)
+  {
+    *p= 0;
+  }
+  sprintf_s(mysqld_path, "%s\\mysqld.exe", bindir);
+  sprintf_s(mysqladmin_path, "%s\\mysqladmin.exe", bindir);
+  sprintf_s(mysqlupgrade_path, "%s\\mysql_upgrade.exe", bindir);
+
+  char *paths[]= {mysqld_path, mysqladmin_path, mysqlupgrade_path};
+  for(int i= 0; i< 3;i++)
+  {
+    if(GetFileAttributes(paths[i]) == INVALID_FILE_ATTRIBUTES)
+      die("File %s does not exist", paths[i]);
+  }
+
+  /*
+    Messages written on stdout should not be buffered,  GUI upgrade program 
+    reads them from pipe and uses as progress indicator.
+  */
+  setvbuf(stdout, NULL, _IONBF, 0);
+
+  log("Phase 1/8: Changing service configuration");
+  change_service_config();
+
+  log("Phase 2/8: Stopping service");
+  stop_mysqld_service();
+
+  /* 
+    Start mysqld.exe as non-service skipping privileges (so we do not 
+    care about the password). But disable networking and enable pipe 
+    for communication, for security reasons.
+  */
+  char socket_param[FN_REFLEN];
+  sprintf_s(socket_param,"--socket=mysql_upgrade_service_%d", 
+    GetCurrentProcessId());
+
+  log("Phase 3/8: Starting mysqld for upgrade");
+  mysqld_process= (HANDLE)run_tool(P_NOWAIT, mysqld_path,
+    defaults_file_param, "--skip-networking",  "--skip-grant-tables", 
+    "--enable-named-pipe",  socket_param, NULL);
+
+  if (mysqld_process == INVALID_HANDLE_VALUE)
+  {
+    die("Cannot start mysqld.exe process, errno=%d", errno);
+  }
+
+  log("Phase 4/8: Waiting for startup to complete");
+  DWORD start_duration_ms= 0;
+  for(;;)
+  {
+    if (WaitForSingleObject(mysqld_process, 0) != WAIT_TIMEOUT)
+      die("mysqld.exe did not start");
+
+    if (run_tool(P_WAIT, mysqladmin_path, "--protocol=pipe",
+      socket_param, "ping",  NULL) == 0)
+    {
+      break;
+    }
+    if (start_duration_ms > startup_timeout*1000)
+      die("Server did not come up in %d seconds",startup_timeout);
+    Sleep(500);
+    start_duration_ms+= 500;
+  }
+
+  log("Phase 5/8: Running mysql_upgrade");
+  int upgrade_err= (int) run_tool(P_WAIT,  mysqlupgrade_path, 
+    "--protocol=pipe", "--force",  socket_param,
+    NULL);
+
+  if (upgrade_err)
+    die("mysql_upgrade failed with error code %d\n", upgrade_err);
+
+  log("Phase 6/8: Initiating server shutdown");
+  initiate_mysqld_shutdown();
+
+  log("Phase 7/8: Waiting for shutdown to complete");
+  if (WaitForSingleObject(mysqld_process, shutdown_timeout*1000)
+      != WAIT_OBJECT_0)
+  {
+    /* Shutdown takes too long */
+    die("mysqld does not shutdown.");
+  }
+  CloseHandle(mysqld_process);
+  mysqld_process= NULL;
+
+  log("Phase 8/8: Starting service%s",
+    (initial_service_state == SERVICE_RUNNING)?"":" (skipped)");
+  if (initial_service_state == SERVICE_RUNNING)
+  {
+    StartService(service, NULL, NULL);
+  }
+
+  log("Service '%s' successfully upgraded.\nLog file is written to %s",
+    opt_service, logfile_path);
+  CloseServiceHandle(service);
+  CloseServiceHandle(scm);
+  if (logfile_handle)
+    CloseHandle(logfile_handle);
+  my_end(0);
+  exit(0);
+}
+\ No newline at end of file
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 0f5087c6ccf..53f7cfe944f 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -1,4 +1,5 @@
 /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -50,6 +51,8 @@
 #include "derror.h"       // init_errmessage
 #include "des_key_file.h" // load_des_key_file
 #include "sql_manager.h"  // stop_handle_manager, start_handle_manager
+#include "sql_expression_cache.h" // subquery_cache_miss, subquery_cache_hit
+
 #include <m_ctype.h>
 #include <my_dir.h>
 #include <my_bit.h>
@@ -57,6 +60,7 @@
 #include "rpl_mi.h"
 #include "sql_repl.h"
 #include "rpl_filter.h"
+#include "client_settings.h"
 #include "repl_failsafe.h"
 #include <sql_common.h>
 #include <my_stacktrace.h>
@@ -66,6 +70,7 @@
 #include "sql_audit.h"
 #include "probes_mysql.h"
 #include "scheduler.h"
+#include <waiting_threads.h>
 #include "debug_sync.h"
 #include "sql_callback.h"
 
@@ -98,9 +103,9 @@
 
 #define mysqld_charset &my_charset_latin1
 
-/* We have HAVE_purify below as this speeds up the shutdown of MySQL */
+/* We have HAVE_valgrind below as this speeds up the shutdown of MySQL */
 
-#if defined(HAVE_DEC_3_2_THREADS) || defined(SIGNALS_DONT_BREAK_READ) || defined(HAVE_purify) && defined(__linux__)
+#if defined(HAVE_DEC_3_2_THREADS) || defined(SIGNALS_DONT_BREAK_READ) || defined(HAVE_valgrind) && defined(__linux__)
 #define HAVE_CLOSE_SERVER_SOCK 1
 #endif
 
@@ -189,6 +194,10 @@ typedef fp_except fp_except_t;
 # endif
 #endif
 
+#ifndef HAVE_FCNTL
+#define fcntl(X,Y,Z) 0
+#endif
+
 extern "C" my_bool reopen_fstreams(const char *filename,
                                    FILE *outstream, FILE *errstream);
 
@@ -289,14 +298,15 @@ static my_bool opt_autocommit; ///< for --autocommit command-line option
 /*
   Used with --help for detailed option
 */
-static my_bool opt_help= 0, opt_verbose= 0;
+static my_bool opt_verbose= 0;
 
-arg_cmp_func Arg_comparator::comparator_matrix[5][2] =
+arg_cmp_func Arg_comparator::comparator_matrix[6][2] =
 {{&Arg_comparator::compare_string,     &Arg_comparator::compare_e_string},
  {&Arg_comparator::compare_real,       &Arg_comparator::compare_e_real},
  {&Arg_comparator::compare_int_signed, &Arg_comparator::compare_e_int},
  {&Arg_comparator::compare_row,        &Arg_comparator::compare_e_row},
- {&Arg_comparator::compare_decimal,    &Arg_comparator::compare_e_decimal}};
+ {&Arg_comparator::compare_decimal,    &Arg_comparator::compare_e_decimal},
+ {&Arg_comparator::compare_datetime,   &Arg_comparator::compare_e_datetime}};
 
 /* static variables */
 
@@ -323,16 +333,17 @@ static PSI_rwlock_key key_rwlock_openssl;
 #endif
 #endif /* HAVE_PSI_INTERFACE */
 
+#undef SAFEMALLOC
+
 /* the default log output is log tables */
 static bool lower_case_table_names_used= 0;
 static bool max_long_data_size_used= false;
 static bool volatile select_thread_in_use, signal_thread_in_use;
-/* See Bug#56666 and Bug#56760 */;
-volatile bool ready_to_exit;
+static volatile bool ready_to_exit;
 static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0;
 static my_bool opt_short_log_format= 0;
+static my_bool opt_sync= 0;
 static uint kill_cached_threads, wake_thread;
-static ulong killed_threads;
 static ulong max_used_connections;
 static volatile ulong cached_thread_count= 0;
 static char *mysqld_user, *mysqld_chroot;
@@ -346,16 +357,15 @@ char *default_storage_engine;
 static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME;
 static I_List<THD> thread_cache;
 static bool binlog_format_used= false;
-
 LEX_STRING opt_init_connect, opt_init_slave;
-
 static mysql_cond_t COND_thread_cache, COND_flush_thread_cache;
 
 /* Global variables */
 
 bool opt_bin_log, opt_ignore_builtin_innodb= 0;
-my_bool opt_log, opt_slow_log;
+my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table= 0, opt_help= 0;
 ulonglong log_output_options;
+my_bool opt_userstat_running, opt_thread_alarm;
 my_bool opt_log_queries_not_using_indexes= 0;
 bool opt_error_log= IF_WIN(1,0);
 bool opt_disable_networking=0, opt_skip_show_db=0;
@@ -367,6 +377,7 @@ my_bool locked_in_memory;
 bool opt_using_transactions;
 bool volatile abort_loop;
 bool volatile shutdown_in_progress;
+uint volatile global_disable_checkpoint;
 /*
   True if the bootstrap thread is running. Protected by LOCK_thread_count,
   just like thread_count.
@@ -386,7 +397,7 @@ bool in_bootstrap= FALSE;
    @brief 'grant_option' is used to indicate if privileges needs
    to be checked, in which case the lock, LOCK_grant, is used
    to protect access to the grant table.
-   @note This flag is dropped in 5.1 
+   @note This flag is dropped in 5.1
    @see grant_init()
  */
 bool volatile grant_option;
@@ -398,14 +409,9 @@ my_bool opt_local_infile, opt_slave_compressed_protocol;
 my_bool opt_safe_user_create = 0;
 my_bool opt_show_slave_auth_info;
 my_bool opt_log_slave_updates= 0;
+my_bool opt_replicate_annotate_rows_events= 0;
 char *opt_slave_skip_errors;
 
-/**
-  compatibility option:
-    - index usage hints (USE INDEX without a FOR clause) behave as in 5.0
-*/
-my_bool old_mode;
-
 /*
   Legacy global handlerton. These will be removed (please do not add more).
 */
@@ -421,6 +427,7 @@ my_bool opt_secure_auth= 0;
 char* opt_secure_file_priv;
 my_bool opt_log_slow_admin_statements= 0;
 my_bool opt_log_slow_slave_statements= 0;
+my_bool opt_query_cache_strip_comments = 0;
 my_bool lower_case_file_system= 0;
 my_bool opt_large_pages= 0;
 my_bool opt_super_large_pages= 0;
@@ -440,11 +447,14 @@ my_bool opt_noacl;
 my_bool sp_automatic_privileges= 1;
 
 ulong opt_binlog_rows_event_max_size;
+my_bool opt_master_verify_checksum= 0;
+my_bool opt_slave_sql_verify_checksum= 1;
 const char *binlog_format_names[]= {"MIXED", "STATEMENT", "ROW", NullS};
 #ifdef HAVE_INITGROUPS
 static bool calling_initgroups= FALSE; /**< Used in SIGSEGV handler. */
 #endif
 uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options;
+uint mysqld_extra_port;
 uint mysqld_port_timeout;
 ulong delay_key_write_options;
 uint protocol_version;
@@ -480,11 +490,16 @@ ulong specialflag=0;
 ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
 ulong binlog_stmt_cache_use= 0, binlog_stmt_cache_disk_use= 0;
 ulong max_connections, max_connect_errors;
+ulong extra_max_connections;
+ulonglong denied_connections;
+my_decimal decimal_zero;
+
 /*
   Maximum length of parameter value which can be set through
   mysql_send_long_data() call.
 */
 ulong max_long_data_size;
+
 /**
   Limit of the total number of prepared statements in the server.
   Is necessary to protect the server against out-of-memory attacks.
@@ -545,9 +560,8 @@ const double log_10[] = {
 time_t server_start_time, flush_status_time;
 
 char mysql_home[FN_REFLEN], pidfile_name[FN_REFLEN], system_time_zone[30];
-char default_logfile_name[FN_REFLEN];
 char *default_tz_name;
-char log_error_file[FN_REFLEN], glob_hostname[FN_REFLEN];
+char log_error_file[FN_REFLEN], glob_hostname[FN_REFLEN], *opt_log_basename;
 char mysql_real_data_home[FN_REFLEN],
      lc_messages_dir[FN_REFLEN], reg_ext[FN_EXTLEN],
      mysql_charsets_dir[FN_REFLEN],
@@ -575,7 +589,6 @@ const char *in_left_expr_name= "<left expr>";
 const char *in_additional_cond= "<IN COND>";
 const char *in_having_cond= "<IN HAVING>";
 
-my_decimal decimal_zero;
 /* classes for comparation parsing/processing */
 Eq_creator eq_creator;
 Ne_creator ne_creator;
@@ -617,12 +630,16 @@ pthread_key(MEM_ROOT**,THR_MALLOC);
 pthread_key(THD*, THR_THD);
 mysql_mutex_t LOCK_thread_count;
 mysql_mutex_t
-  LOCK_status, LOCK_error_log, LOCK_uuid_generator,
+  LOCK_status, LOCK_error_log, LOCK_short_uuid_generator,
   LOCK_delayed_insert, LOCK_delayed_status, LOCK_delayed_create,
   LOCK_crypt,
   LOCK_global_system_variables,
   LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
   LOCK_connection_count, LOCK_error_messages;
+
+mysql_mutex_t LOCK_stats, LOCK_global_user_client_stats,
+              LOCK_global_table_stats, LOCK_global_index_stats;
+
 /**
   The below lock protects access to two global server variables:
   max_prepared_stmt_count and prepared_stmt_count. These variables
@@ -657,10 +674,11 @@ char *opt_logname, *opt_slow_logname;
 /* Static variables */
 
 static bool kill_in_progress, segfaulted;
+static my_bool opt_stack_trace;
 static my_bool opt_bootstrap, opt_myisam_log;
 static int cleanup_done;
 static ulong opt_specialflag;
-static char *opt_update_logname, *opt_binlog_index_name;
+static char *opt_binlog_index_name;
 char *mysql_home_ptr, *pidfile_name_ptr;
 /** Initial command line arguments (count), after load_defaults().*/
 static int defaults_argc;
@@ -682,6 +700,269 @@ static char *opt_bin_logname;
 int orig_argc;
 char **orig_argv;
 
+#ifdef HAVE_PSI_INTERFACE
+#ifdef HAVE_MMAP
+PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool;
+#endif /* HAVE_MMAP */
+
+#ifdef HAVE_OPENSSL
+PSI_mutex_key key_LOCK_des_key_file;
+#endif /* HAVE_OPENSSL */
+
+PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
+  key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
+  key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
+  key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
+  key_LOCK_gdl, key_LOCK_global_system_variables,
+  key_LOCK_manager,
+  key_LOCK_prepared_stmt_count,
+  key_LOCK_rpl_status, key_LOCK_server_started, key_LOCK_status,
+  key_LOCK_system_variables_hash, key_LOCK_table_share, key_LOCK_thd_data,
+  key_LOCK_user_conn, key_LOCK_uuid_short_generator, key_LOG_LOCK_log,
+  key_master_info_data_lock, key_master_info_run_lock,
+  key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
+  key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
+  key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
+  key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
+  key_PARTITION_LOCK_auto_inc;
+PSI_mutex_key key_RELAYLOG_LOCK_index;
+
+PSI_mutex_key key_LOCK_stats,
+  key_LOCK_global_user_client_stats, key_LOCK_global_table_stats,
+  key_LOCK_global_index_stats,
+  key_LOCK_wakeup_ready;
+
+PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered;
+
+static PSI_mutex_info all_server_mutexes[]=
+{
+#ifdef HAVE_MMAP
+  { &key_PAGE_lock, "PAGE::lock", 0},
+  { &key_LOCK_sync, "TC_LOG_MMAP::LOCK_sync", 0},
+  { &key_LOCK_active, "TC_LOG_MMAP::LOCK_active", 0},
+  { &key_LOCK_pool, "TC_LOG_MMAP::LOCK_pool", 0},
+#endif /* HAVE_MMAP */
+
+#ifdef HAVE_OPENSSL
+  { &key_LOCK_des_key_file, "LOCK_des_key_file", PSI_FLAG_GLOBAL},
+#endif /* HAVE_OPENSSL */
+
+  { &key_BINLOG_LOCK_index, "MYSQL_BIN_LOG::LOCK_index", 0},
+  { &key_BINLOG_LOCK_prep_xids, "MYSQL_BIN_LOG::LOCK_prep_xids", 0},
+  { &key_RELAYLOG_LOCK_index, "MYSQL_RELAY_LOG::LOCK_index", 0},
+  { &key_delayed_insert_mutex, "Delayed_insert::mutex", 0},
+  { &key_hash_filo_lock, "hash_filo::lock", 0},
+  { &key_LOCK_active_mi, "LOCK_active_mi", PSI_FLAG_GLOBAL},
+  { &key_LOCK_connection_count, "LOCK_connection_count", PSI_FLAG_GLOBAL},
+  { &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_GLOBAL},
+  { &key_LOCK_delayed_create, "LOCK_delayed_create", PSI_FLAG_GLOBAL},
+  { &key_LOCK_delayed_insert, "LOCK_delayed_insert", PSI_FLAG_GLOBAL},
+  { &key_LOCK_delayed_status, "LOCK_delayed_status", PSI_FLAG_GLOBAL},
+  { &key_LOCK_error_log, "LOCK_error_log", PSI_FLAG_GLOBAL},
+  { &key_LOCK_gdl, "LOCK_gdl", PSI_FLAG_GLOBAL},
+  { &key_LOCK_global_system_variables, "LOCK_global_system_variables", PSI_FLAG_GLOBAL},
+  { &key_LOCK_manager, "LOCK_manager", PSI_FLAG_GLOBAL},
+  { &key_LOCK_prepared_stmt_count, "LOCK_prepared_stmt_count", PSI_FLAG_GLOBAL},
+  { &key_LOCK_rpl_status, "LOCK_rpl_status", PSI_FLAG_GLOBAL},
+  { &key_LOCK_server_started, "LOCK_server_started", PSI_FLAG_GLOBAL},
+  { &key_LOCK_status, "LOCK_status", PSI_FLAG_GLOBAL},
+  { &key_LOCK_system_variables_hash, "LOCK_system_variables_hash", PSI_FLAG_GLOBAL},
+  { &key_LOCK_table_share, "LOCK_table_share", PSI_FLAG_GLOBAL},
+  { &key_LOCK_stats, "LOCK_stats", PSI_FLAG_GLOBAL},
+  { &key_LOCK_global_user_client_stats, "LOCK_global_user_client_stats", PSI_FLAG_GLOBAL},
+  { &key_LOCK_global_table_stats, "LOCK_global_table_stats", PSI_FLAG_GLOBAL},
+  { &key_LOCK_global_index_stats, "LOCK_global_index_stats", PSI_FLAG_GLOBAL},
+  { &key_LOCK_wakeup_ready, "THD::LOCK_wakeup_ready", 0},
+  { &key_LOCK_thd_data, "THD::LOCK_thd_data", 0},
+  { &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL},
+  { &key_LOCK_uuid_short_generator, "LOCK_uuid_short_generator", PSI_FLAG_GLOBAL},
+  { &key_LOG_LOCK_log, "LOG::LOCK_log", 0},
+  { &key_master_info_data_lock, "Master_info::data_lock", 0},
+  { &key_master_info_run_lock, "Master_info::run_lock", 0},
+  { &key_mutex_slave_reporting_capability_err_lock, "Slave_reporting_capability::err_lock", 0},
+  { &key_relay_log_info_data_lock, "Relay_log_info::data_lock", 0},
+  { &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0},
+  { &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0},
+  { &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0},
+  { &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0},
+  { &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL},
+  { &key_LOCK_prepare_ordered, "LOCK_prepare_ordered", PSI_FLAG_GLOBAL},
+  { &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL},
+  { &key_LOG_INFO_lock, "LOG_INFO::lock", 0},
+  { &key_LOCK_thread_count, "LOCK_thread_count", PSI_FLAG_GLOBAL},
+  { &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0}
+};
+
+PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger,
+  key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave,
+  key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock;
+
+static PSI_rwlock_info all_server_rwlocks[]=
+{
+#if defined (HAVE_OPENSSL) && !defined(HAVE_YASSL)
+  { &key_rwlock_openssl, "CRYPTO_dynlock_value::lock", 0},
+#endif
+  { &key_rwlock_LOCK_grant, "LOCK_grant", PSI_FLAG_GLOBAL},
+  { &key_rwlock_LOCK_logger, "LOGGER::LOCK_logger", 0},
+  { &key_rwlock_LOCK_sys_init_connect, "LOCK_sys_init_connect", PSI_FLAG_GLOBAL},
+  { &key_rwlock_LOCK_sys_init_slave, "LOCK_sys_init_slave", PSI_FLAG_GLOBAL},
+  { &key_rwlock_LOCK_system_variables_hash, "LOCK_system_variables_hash", PSI_FLAG_GLOBAL},
+  { &key_rwlock_query_cache_query_lock, "Query_cache_query::lock", 0}
+};
+
+#ifdef HAVE_MMAP
+PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool, key_COND_queue_busy;
+#endif /* HAVE_MMAP */
+
+PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
+  key_COND_cache_status_changed, key_COND_manager,
+  key_COND_rpl_status, key_COND_server_started,
+  key_delayed_insert_cond, key_delayed_insert_cond_client,
+  key_item_func_sleep_cond, key_master_info_data_cond,
+  key_master_info_start_cond, key_master_info_stop_cond,
+  key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
+  key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
+  key_TABLE_SHARE_cond, key_user_level_lock_cond,
+  key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
+PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready;
+
+static PSI_cond_info all_server_conds[]=
+{
+#if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY)
+  { &key_COND_handler_count, "COND_handler_count", PSI_FLAG_GLOBAL},
+#endif /* _WIN32 || HAVE_SMEM && !EMBEDDED_LIBRARY */
+#ifdef HAVE_MMAP
+  { &key_PAGE_cond, "PAGE::cond", 0},
+  { &key_COND_active, "TC_LOG_MMAP::COND_active", 0},
+  { &key_COND_pool, "TC_LOG_MMAP::COND_pool", 0},
+#endif /* HAVE_MMAP */
+  { &key_BINLOG_COND_prep_xids, "MYSQL_BIN_LOG::COND_prep_xids", 0},
+  { &key_BINLOG_update_cond, "MYSQL_BIN_LOG::update_cond", 0},
+  { &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0},
+  { &key_COND_wakeup_ready, "THD::COND_wakeup_ready", 0},
+  { &key_COND_cache_status_changed, "Query_cache::COND_cache_status_changed", 0},
+  { &key_COND_manager, "COND_manager", PSI_FLAG_GLOBAL},
+  { &key_COND_rpl_status, "COND_rpl_status", PSI_FLAG_GLOBAL},
+  { &key_COND_server_started, "COND_server_started", PSI_FLAG_GLOBAL},
+  { &key_delayed_insert_cond, "Delayed_insert::cond", 0},
+  { &key_delayed_insert_cond_client, "Delayed_insert::cond_client", 0},
+  { &key_item_func_sleep_cond, "Item_func_sleep::cond", 0},
+  { &key_master_info_data_cond, "Master_info::data_cond", 0},
+  { &key_master_info_start_cond, "Master_info::start_cond", 0},
+  { &key_master_info_stop_cond, "Master_info::stop_cond", 0},
+  { &key_relay_log_info_data_cond, "Relay_log_info::data_cond", 0},
+  { &key_relay_log_info_log_space_cond, "Relay_log_info::log_space_cond", 0},
+  { &key_relay_log_info_start_cond, "Relay_log_info::start_cond", 0},
+  { &key_relay_log_info_stop_cond, "Relay_log_info::stop_cond", 0},
+  { &key_TABLE_SHARE_cond, "TABLE_SHARE::cond", 0},
+  { &key_user_level_lock_cond, "User_level_lock::cond", 0},
+  { &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL},
+  { &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL},
+  { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL}
+};
+
+PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert,
+  key_thread_handle_manager, key_thread_main,
+  key_thread_one_connection, key_thread_signal_hand;
+
+static PSI_thread_info all_server_threads[]=
+{
+#if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY)
+  { &key_thread_handle_con_namedpipes, "con_named_pipes", PSI_FLAG_GLOBAL},
+#endif /* _WIN32 || HAVE_SMEM && !EMBEDDED_LIBRARY */
+
+#if defined(HAVE_SMEM) && !defined(EMBEDDED_LIBRARY)
+  { &key_thread_handle_con_sharedmem, "con_shared_mem", PSI_FLAG_GLOBAL},
+#endif /* HAVE_SMEM && !EMBEDDED_LIBRARY */
+
+#if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY)
+  { &key_thread_handle_con_sockets, "con_sockets", PSI_FLAG_GLOBAL},
+#endif /* _WIN32 || HAVE_SMEM && !EMBEDDED_LIBRARY */
+
+#ifdef __WIN__
+  { &key_thread_handle_shutdown, "shutdown", PSI_FLAG_GLOBAL},
+#endif /* __WIN__ */
+
+  { &key_thread_bootstrap, "bootstrap", PSI_FLAG_GLOBAL},
+  { &key_thread_delayed_insert, "delayed_insert", 0},
+  { &key_thread_handle_manager, "manager", PSI_FLAG_GLOBAL},
+  { &key_thread_main, "main", PSI_FLAG_GLOBAL},
+  { &key_thread_one_connection, "one_connection", 0},
+  { &key_thread_signal_hand, "signal_handler", PSI_FLAG_GLOBAL}
+};
+
+PSI_file_key key_file_binlog, key_file_binlog_index, key_file_casetest,
+  key_file_dbopt, key_file_des_key_file, key_file_ERRMSG, key_select_to_file,
+  key_file_fileparser, key_file_frm, key_file_global_ddl_log, key_file_load,
+  key_file_loadfile, key_file_log_event_data, key_file_log_event_info,
+  key_file_master_info, key_file_misc, key_file_partition,
+  key_file_pid, key_file_relay_log_info, key_file_send_file, key_file_tclog,
+  key_file_trg, key_file_trn, key_file_init;
+PSI_file_key key_file_query_log, key_file_slow_log;
+PSI_file_key key_file_relaylog, key_file_relaylog_index;
+
+static PSI_file_info all_server_files[]=
+{
+  { &key_file_binlog, "binlog", 0},
+  { &key_file_binlog_index, "binlog_index", 0},
+  { &key_file_relaylog, "relaylog", 0},
+  { &key_file_relaylog_index, "relaylog_index", 0},
+  { &key_file_casetest, "casetest", 0},
+  { &key_file_dbopt, "dbopt", 0},
+  { &key_file_des_key_file, "des_key_file", 0},
+  { &key_file_ERRMSG, "ERRMSG", 0},
+  { &key_select_to_file, "select_to_file", 0},
+  { &key_file_fileparser, "file_parser", 0},
+  { &key_file_frm, "FRM", 0},
+  { &key_file_global_ddl_log, "global_ddl_log", 0},
+  { &key_file_load, "load", 0},
+  { &key_file_loadfile, "LOAD_FILE", 0},
+  { &key_file_log_event_data, "log_event_data", 0},
+  { &key_file_log_event_info, "log_event_info", 0},
+  { &key_file_master_info, "master_info", 0},
+  { &key_file_misc, "misc", 0},
+  { &key_file_partition, "partition", 0},
+  { &key_file_pid, "pid", 0},
+  { &key_file_query_log, "query_log", 0},
+  { &key_file_relay_log_info, "relay_log_info", 0},
+  { &key_file_send_file, "send_file", 0},
+  { &key_file_slow_log, "slow_log", 0},
+  { &key_file_tclog, "tclog", 0},
+  { &key_file_trg, "trigger_name", 0},
+  { &key_file_trn, "trigger", 0},
+  { &key_file_init, "init", 0}
+};
+
+/**
+  Initialise all the performance schema instrumentation points
+  used by the server.
+*/
+void init_server_psi_keys(void)
+{
+  const char* category= "sql";
+  int count;
+
+  if (PSI_server == NULL)
+    return;
+
+  count= array_elements(all_server_mutexes);
+  PSI_server->register_mutex(category, all_server_mutexes, count);
+
+  count= array_elements(all_server_rwlocks);
+  PSI_server->register_rwlock(category, all_server_rwlocks, count);
+
+  count= array_elements(all_server_conds);
+  PSI_server->register_cond(category, all_server_conds, count);
+
+  count= array_elements(all_server_threads);
+  PSI_server->register_thread(category, all_server_threads, count);
+
+  count= array_elements(all_server_files);
+  PSI_server->register_file(category, all_server_files, count);
+}
+
+#endif /* HAVE_PSI_INTERFACE */
+
 /*
   Since buffered_option_error_reporter is only used currently
   for parsing performance schema options, this code is not needed
@@ -845,13 +1126,12 @@ C_MODE_END
 #endif /* !EMBEDDED_LIBRARY */
 #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
 
-static my_socket unix_sock,ip_sock;
-struct rand_struct sql_rand; ///< used by sql_class.cc:THD::THD()
+static my_socket unix_sock, base_ip_sock, extra_ip_sock;
+struct my_rnd_struct sql_rand; ///< used by sql_class.cc:THD::THD()
 
 #ifndef EMBEDDED_LIBRARY
 struct passwd *user_info;
 static pthread_t select_thread;
-static uint thr_kill_signal;
 #endif
 
 /* OS specific variables */
@@ -890,7 +1170,7 @@ bool mysqld_embedded=1;
 static my_bool plugins_are_initialized= FALSE;
 
 #ifndef DBUG_OFF
-static const char* default_dbug_option;
+static const char* default_dbug_option, *current_dbug_option;
 #endif
 #ifdef HAVE_LIBWRAP
 const char *libwrapName= NULL;
@@ -911,6 +1191,10 @@ my_bool opt_use_ssl  = 0;
 char *opt_ssl_ca= NULL, *opt_ssl_capath= NULL, *opt_ssl_cert= NULL,
      *opt_ssl_cipher= NULL, *opt_ssl_key= NULL;
 
+static scheduler_functions thread_scheduler_struct, extra_thread_scheduler_struct;
+scheduler_functions *thread_scheduler= &thread_scheduler_struct,
+                    *extra_thread_scheduler= &extra_thread_scheduler_struct;
+
 #ifdef HAVE_OPENSSL
 #include <openssl/crypto.h>
 #ifndef HAVE_YASSL
@@ -936,7 +1220,7 @@ struct st_VioSSLFd *ssl_acceptor_fd;
   Number of currently active user connections. The variable is protected by
   LOCK_connection_count.
 */
-uint connection_count= 0;
+uint connection_count= 0, extra_connection_count= 0;
 
 /* Function declarations */
 
@@ -1020,7 +1304,7 @@ static void close_connections(void)
 	break;
     }
 #ifdef EXTRA_DEBUG
-    if (error != 0 && !count++)
+    if (error != 0 && error != ETIMEDOUT && !count++)
       sql_print_error("Got error %d from mysql_cond_timedwait", error);
 #endif
     close_server_sock();
@@ -1033,11 +1317,17 @@ static void close_connections(void)
   DBUG_PRINT("quit",("Closing sockets"));
   if (!opt_disable_networking )
   {
-    if (ip_sock != INVALID_SOCKET)
+    if (base_ip_sock != INVALID_SOCKET)
     {
-      (void) shutdown(ip_sock, SHUT_RDWR);
-      (void) closesocket(ip_sock);
-      ip_sock= INVALID_SOCKET;
+      (void) shutdown(base_ip_sock, SHUT_RDWR);
+      (void) closesocket(base_ip_sock);
+      base_ip_sock= INVALID_SOCKET;
+    }
+    if (extra_ip_sock != INVALID_SOCKET)
+    {
+      (void) shutdown(extra_ip_sock, SHUT_RDWR);
+      (void) closesocket(extra_ip_sock);
+      extra_ip_sock= INVALID_SOCKET;
     }
   }
 #ifdef _WIN32
@@ -1102,9 +1392,19 @@ static void close_connections(void)
       mysql_mutex_lock(&tmp->mysys_var->mutex);
       if (tmp->mysys_var->current_cond)
       {
-        mysql_mutex_lock(tmp->mysys_var->current_mutex);
-        mysql_cond_broadcast(tmp->mysys_var->current_cond);
-        mysql_mutex_unlock(tmp->mysys_var->current_mutex);
+        uint i;
+        for (i=0; i < 2; i++)
+        {
+          int ret= mysql_mutex_trylock(tmp->mysys_var->current_mutex);
+          mysql_cond_broadcast(tmp->mysys_var->current_cond);
+          if (!ret)
+          {
+            /* Thread has surely got the signal, unlock and abort */
+            mysql_mutex_unlock(tmp->mysys_var->current_mutex);
+            break;
+          }
+          sleep(1);
+        }
       }
       mysql_mutex_unlock(&tmp->mysys_var->mutex);
     }
@@ -1115,8 +1415,9 @@ static void close_connections(void)
   Events::deinit();
   end_slave();
 
-  if (thread_count)
-    sleep(2);					// Give threads time to die
+  /* Give threads time to die. */
+  for (int i= 0; thread_count && i < 100; i++)
+    my_sleep(20000);
 
   /*
     Force remaining threads to die by closing the connection to the client
@@ -1158,32 +1459,47 @@ static void close_connections(void)
   }
   mysql_mutex_unlock(&LOCK_thread_count);
 
-  close_active_mi();
   DBUG_PRINT("quit",("close_connections thread"));
   DBUG_VOID_RETURN;
 }
 
 
+#ifdef HAVE_CLOSE_SERVER_SOCK
+static void close_socket(my_socket sock, const char *info)
+{
+  DBUG_ENTER("close_socket");
+
+  if (sock != INVALID_SOCKET)
+  {
+    DBUG_PRINT("info", ("calling shutdown on %s socket", info));
+    (void) shutdown(sock, SHUT_RDWR);
+#if defined(__NETWARE__)
+    /*
+      The following code is disabled for normal systems as it causes MySQL
+      to hang on AIX 4.3 during shutdown
+    */
+    DBUG_PRINT("info", ("calling closesocket on %s socket", info));
+    (void) closesocket(tmp_sock);
+#endif
+  }
+  DBUG_VOID_RETURN;
+}
+#endif
+
+
 static void close_server_sock()
 {
 #ifdef HAVE_CLOSE_SERVER_SOCK
   DBUG_ENTER("close_server_sock");
-  my_socket tmp_sock;
-  tmp_sock=ip_sock;
-  if (tmp_sock != INVALID_SOCKET)
-  {
-    ip_sock=INVALID_SOCKET;
-    DBUG_PRINT("info",("calling shutdown on TCP/IP socket"));
-    (void) shutdown(tmp_sock, SHUT_RDWR);
-  }
-  tmp_sock=unix_sock;
-  if (tmp_sock != INVALID_SOCKET)
-  {
-    unix_sock=INVALID_SOCKET;
-    DBUG_PRINT("info",("calling shutdown on unix socket"));
-    (void) shutdown(tmp_sock, SHUT_RDWR);
+
+  close_socket(base_ip_sock, "TCP/IP");
+  close_socket(extra_ip_sock, "TCP/IP");
+  close_socket(unix_sock, "unix/IP");
+
+  if (unix_sock != INVALID_SOCKET)
     (void) unlink(mysqld_unix_port);
-  }
+  base_ip_sock= extra_ip_sock= unix_sock= INVALID_SOCKET;
+
   DBUG_VOID_RETURN;
 #endif
 }
@@ -1276,21 +1592,21 @@ static void __cdecl kill_server(int sig_ptr)
   else
     sql_print_error(ER_DEFAULT(ER_GOT_SIGNAL),my_progname,sig); /* purecov: inspected */
 
-#if defined(HAVE_SMEM) && defined(__WIN__)    
-  /*    
-   Send event to smem_event_connect_request for aborting    
-   */    
+#ifdef HAVE_SMEM
+  /*
+    Send event to smem_event_connect_request for aborting
+  */
   if (opt_enable_shared_memory)
   {
-    if (!SetEvent(smem_event_connect_request))    
-    {      
+    if (!SetEvent(smem_event_connect_request))
+    {
       DBUG_PRINT("error",
                  ("Got error: %ld from SetEvent of smem_event_connect_request",
-                  GetLastError()));    
+                  GetLastError()));
     }
   }
-#endif  
-  
+#endif
+
   close_connections();
   if (sig != MYSQL_KILL_SIGNAL &&
       sig != 0)
@@ -1402,13 +1718,9 @@ static void mysqld_exit(int exit_code)
   mysql_audit_finalize();
   clean_up_mutexes();
   clean_up_error_log_mutex();
-#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
-  /*
-    Bug#56666 needs to be fixed before calling:
-    shutdown_performance_schema();
-  */
-#endif
-  my_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0);
+  my_end((opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0) | MY_DONT_FREE_DBUG);
+  shutdown_performance_schema();        // we do it as late as possible
+  DBUG_END();                           // but this - even later
   exit(exit_code); /* purecov: inspected */
 }
 
@@ -1420,6 +1732,7 @@ void clean_up(bool print_message)
   if (cleanup_done++)
     return; /* purecov: inspected */
 
+  close_active_mi();
   stop_handle_manager();
   release_ddl_log();
 
@@ -1467,19 +1780,25 @@ void clean_up(bool print_message)
   table_def_free();
   mdl_destroy();
   key_caches.delete_elements((void (*)(const char*, uchar*)) free_key_cache);
+  wt_end();
   multi_keycache_free();
+  sp_cache_end();
   free_status_vars();
   end_thr_alarm(1);			/* Free allocated memory */
   my_free_open_file_info();
   if (defaults_argv)
     free_defaults(defaults_argv);
   free_tmpdir(&mysql_tmpdir_list);
-  my_free(opt_bin_logname);
   bitmap_free(&temp_pool);
   free_max_user_conn();
+  free_global_user_stats();
+  free_global_client_stats();
+  free_global_table_stats();
+  free_global_index_stats();
 #ifdef HAVE_REPLICATION
   end_slave_list();
 #endif
+  my_uuid_end();
   delete binlog_filter;
   delete rpl_filter;
   end_ssl();
@@ -1498,12 +1817,13 @@ void clean_up(bool print_message)
     sql_print_information(ER_DEFAULT(ER_SHUTDOWN_COMPLETE),my_progname);
   cleanup_errmsgs();
   MYSQL_CALLBACK(thread_scheduler, end, ());
-  mysql_client_plugin_deinit();
+  mysql_library_end();
   finish_client_errs();
   (void) my_error_unregister(ER_ERROR_FIRST, ER_ERROR_LAST); // finish server errs
   DBUG_PRINT("quit", ("Error messages freed"));
   /* Tell main we are ready */
   logger.cleanup_end();
+  sys_var_end();
   my_atomic_rwlock_destroy(&global_query_id_lock);
   my_atomic_rwlock_destroy(&thread_running_lock);
   mysql_mutex_lock(&LOCK_thread_count);
@@ -1512,7 +1832,6 @@ void clean_up(bool print_message)
   /* do the broadcast inside the lock to ensure that my_end() is not called */
   mysql_cond_broadcast(&COND_thread_count);
   mysql_mutex_unlock(&LOCK_thread_count);
-  sys_var_end();
 
   /*
     The following lines may never be executed as the main thread may have
@@ -1537,15 +1856,16 @@ static void wait_for_signal_thread_to_end()
   */
   for (i= 0 ; i < 100 && signal_thread_in_use; i++)
   {
-    if (pthread_kill(signal_thread, MYSQL_KILL_SIGNAL) != ESRCH)
+    if (pthread_kill(signal_thread, MYSQL_KILL_SIGNAL) == ESRCH)
       break;
     my_sleep(100);				// Give it time to die
   }
 }
-
+#endif /*EMBEDDED_LIBRARY*/
 
 static void clean_up_mutexes()
 {
+  DBUG_ENTER("clean_up_mutexes");
   mysql_rwlock_destroy(&LOCK_grant);
   mysql_mutex_destroy(&LOCK_thread_count);
   mysql_mutex_destroy(&LOCK_status);
@@ -1556,32 +1876,40 @@ static void clean_up_mutexes()
   mysql_mutex_destroy(&LOCK_crypt);
   mysql_mutex_destroy(&LOCK_user_conn);
   mysql_mutex_destroy(&LOCK_connection_count);
+  mysql_mutex_destroy(&LOCK_stats);
+  mysql_mutex_destroy(&LOCK_global_user_client_stats);
+  mysql_mutex_destroy(&LOCK_global_table_stats);
+  mysql_mutex_destroy(&LOCK_global_index_stats);
 #ifdef HAVE_OPENSSL
   mysql_mutex_destroy(&LOCK_des_key_file);
 #ifndef HAVE_YASSL
   for (int i= 0; i < CRYPTO_num_locks(); ++i)
     mysql_rwlock_destroy(&openssl_stdlocks[i].lock);
   OPENSSL_free(openssl_stdlocks);
-#endif
-#endif
+#endif /* HAVE_YASSL */
+#endif /* HAVE_OPENSSL */
 #ifdef HAVE_REPLICATION
   mysql_mutex_destroy(&LOCK_rpl_status);
   mysql_cond_destroy(&COND_rpl_status);
-#endif
+#endif /* HAVE_REPLICATION */
   mysql_mutex_destroy(&LOCK_active_mi);
   mysql_rwlock_destroy(&LOCK_sys_init_connect);
   mysql_rwlock_destroy(&LOCK_sys_init_slave);
   mysql_mutex_destroy(&LOCK_global_system_variables);
   mysql_rwlock_destroy(&LOCK_system_variables_hash);
-  mysql_mutex_destroy(&LOCK_uuid_generator);
+  mysql_mutex_destroy(&LOCK_short_uuid_generator);
   mysql_mutex_destroy(&LOCK_prepared_stmt_count);
   mysql_mutex_destroy(&LOCK_error_messages);
   mysql_cond_destroy(&COND_thread_count);
   mysql_cond_destroy(&COND_thread_cache);
   mysql_cond_destroy(&COND_flush_thread_cache);
   mysql_cond_destroy(&COND_manager);
+  mysql_mutex_destroy(&LOCK_server_started);
+  mysql_cond_destroy(&COND_server_started);
+  mysql_mutex_destroy(&LOCK_prepare_ordered);
+  mysql_mutex_destroy(&LOCK_commit_ordered);
+  DBUG_VOID_RETURN;
 }
-#endif /*EMBEDDED_LIBRARY*/
 
 
 /****************************************************************************
@@ -1754,127 +2082,143 @@ static void set_root(const char *path)
 #endif
 }
 
+/**
+   Activate usage of a tcp port
+*/
 
-static void network_init(void)
+static my_socket activate_tcp_port(uint port)
 {
-#ifdef HAVE_SYS_UN_H
-  struct sockaddr_un	UNIXaddr;
-#endif
+  struct addrinfo *ai, *a;
+  struct addrinfo hints;
+  int error;
   int	arg;
-  int   ret;
-  uint  waited;
-  uint  this_wait;
-  uint  retry;
   char port_buf[NI_MAXSERV];
-  DBUG_ENTER("network_init");
-  LINT_INIT(ret);
-
-  if (MYSQL_CALLBACK_ELSE(thread_scheduler, init, (), 0))
-    unireg_abort(1);			/* purecov: inspected */
+  my_socket ip_sock= INVALID_SOCKET;
+  DBUG_ENTER("activate_tcp_port");
+  DBUG_PRINT("general",("IP Socket is %d",port));
 
-  set_ports();
+  bzero(&hints, sizeof (hints));
+  hints.ai_flags= AI_PASSIVE;
+  hints.ai_socktype= SOCK_STREAM;
+  hints.ai_family= AF_UNSPEC;
 
-  if (mysqld_port != 0 && !opt_disable_networking && !opt_bootstrap)
+  my_snprintf(port_buf, NI_MAXSERV, "%d", port);
+  error= getaddrinfo(my_bind_addr_str, port_buf, &hints, &ai);
+  if (error != 0)
   {
-    struct addrinfo *ai, *a;
-    struct addrinfo hints;
-    int error;
-    DBUG_PRINT("general",("IP Socket is %d",mysqld_port));
-
-    bzero(&hints, sizeof (hints));
-    hints.ai_flags= AI_PASSIVE;
-    hints.ai_socktype= SOCK_STREAM;
-    hints.ai_family= AF_UNSPEC;
-
-    my_snprintf(port_buf, NI_MAXSERV, "%d", mysqld_port);
-    error= getaddrinfo(my_bind_addr_str, port_buf, &hints, &ai);
-    if (error != 0)
-    {
-      DBUG_PRINT("error",("Got error: %d from getaddrinfo()", error));
-      sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR));  /* purecov: tested */
-      unireg_abort(1);				/* purecov: tested */
-    }
+    DBUG_PRINT("error",("Got error: %d from getaddrinfo()", error));
+    sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR));  /* purecov: tested */
+    unireg_abort(1);				/* purecov: tested */
+  }
 
-    for (a= ai; a != NULL; a= a->ai_next)
-    {
-      ip_sock= socket(a->ai_family, a->ai_socktype, a->ai_protocol);
-      if (ip_sock != INVALID_SOCKET)
-        break;
-    }
+  for (a= ai; a != NULL; a= a->ai_next)
+  {
+    ip_sock= socket(a->ai_family, a->ai_socktype, a->ai_protocol);
+    if (ip_sock != INVALID_SOCKET)
+      break;
+  }
 
-    if (ip_sock == INVALID_SOCKET)
-    {
-      DBUG_PRINT("error",("Got error: %d from socket()",socket_errno));
-      sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR));  /* purecov: tested */
-      unireg_abort(1);				/* purecov: tested */
-    }
+  if (ip_sock == INVALID_SOCKET)
+  {
+    DBUG_PRINT("error",("Got error: %d from socket()",socket_errno));
+    sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR));  /* purecov: tested */
+    unireg_abort(1);				/* purecov: tested */
+  }
 
 #ifndef __WIN__
-    /*
-      We should not use SO_REUSEADDR on windows as this would enable a
-      user to open two mysqld servers with the same TCP/IP port.
-    */
-    arg= 1;
-    (void) setsockopt(ip_sock,SOL_SOCKET,SO_REUSEADDR,(char*)&arg,sizeof(arg));
+  /*
+    We should not use SO_REUSEADDR on windows as this would enable a
+    user to open two mysqld servers with the same TCP/IP port.
+  */
+  arg= 1;
+  (void) setsockopt(ip_sock,SOL_SOCKET,SO_REUSEADDR,(char*)&arg,sizeof(arg));
 #endif /* __WIN__ */
 
 #ifdef IPV6_V6ONLY
-     /*
-       For interoperability with older clients, IPv6 socket should
-       listen on both IPv6 and IPv4 wildcard addresses.
-       Turn off IPV6_V6ONLY option.
+   /*
+     For interoperability with older clients, IPv6 socket should
+     listen on both IPv6 and IPv4 wildcard addresses.
+     Turn off IPV6_V6ONLY option.
 
-       NOTE: this will work starting from Windows Vista only.
-       On Windows XP dual stack is not available, so it will not
-       listen on the corresponding IPv4-address.
-     */
-    if (a->ai_family == AF_INET6)
-    {
-      arg= 0;
-      (void) setsockopt(ip_sock, IPPROTO_IPV6, IPV6_V6ONLY, (char*)&arg,
-                sizeof(arg));
-    }
+     NOTE: this will work starting from Windows Vista only.
+     On Windows XP dual stack is not available, so it will not
+     listen on the corresponding IPv4-address.
+   */
+  if (a->ai_family == AF_INET6)
+  {
+    arg= 0;
+    (void) setsockopt(ip_sock, IPPROTO_IPV6, IPV6_V6ONLY, (char*)&arg,
+              sizeof(arg));
+  }
 #endif
-    /*
-      Sometimes the port is not released fast enough when stopping and
-      restarting the server. This happens quite often with the test suite
-      on busy Linux systems. Retry to bind the address at these intervals:
-      Sleep intervals: 1, 2, 4,  6,  9, 13, 17, 22, ...
-      Retry at second: 1, 3, 7, 13, 22, 35, 52, 74, ...
-      Limit the sequence by mysqld_port_timeout (set --port-open-timeout=#).
-    */
-    for (waited= 0, retry= 1; ; retry++, waited+= this_wait)
-    {
-      if (((ret= bind(ip_sock, a->ai_addr, a->ai_addrlen)) >= 0 ) ||
-          (socket_errno != SOCKET_EADDRINUSE) ||
-          (waited >= mysqld_port_timeout))
-        break;
-      sql_print_information("Retrying bind on TCP/IP port %u", mysqld_port);
-      this_wait= retry * retry / 3 + 1;
-      sleep(this_wait);
-    }
-    freeaddrinfo(ai);
-    if (ret < 0)
-    {
-      DBUG_PRINT("error",("Got error: %d from bind",socket_errno));
-      sql_perror("Can't start server: Bind on TCP/IP port");
-      sql_print_error("Do you already have another mysqld server running on port: %d ?",mysqld_port);
-      unireg_abort(1);
-    }
-    if (listen(ip_sock,(int) back_log) < 0)
-    {
-      sql_perror("Can't start server: listen() on TCP/IP port");
-      sql_print_error("listen() on TCP/IP failed with error %d",
-		      socket_errno);
-      unireg_abort(1);
-    }
+  /*
+    Sometimes the port is not released fast enough when stopping and
+    restarting the server. This happens quite often with the test suite
+    on busy Linux systems. Retry to bind the address at these intervals:
+    Sleep intervals: 1, 2, 4,  6,  9, 13, 17, 22, ...
+    Retry at second: 1, 3, 7, 13, 22, 35, 52, 74, ...
+    Limit the sequence by mysqld_port_timeout (set --port-open-timeout=#).
+  */
+  int ret;
+  uint waited, retry, this_wait;
+  for (waited= 0, retry= 1; ; retry++, waited+= this_wait)
+  {
+    if (((ret= bind(ip_sock, a->ai_addr, a->ai_addrlen)) >= 0 ) ||
+        (socket_errno != SOCKET_EADDRINUSE) ||
+        (waited >= mysqld_port_timeout))
+      break;
+    sql_print_information("Retrying bind on TCP/IP port %u", port);
+    this_wait= retry * retry / 3 + 1;
+    sleep(this_wait);
+  }
+  freeaddrinfo(ai);
+  if (ret < 0)
+  {
+    char buff[100];
+    sprintf(buff, "Can't start server: Bind on TCP/IP port. Got error: %d",
+            (int) socket_errno);
+    sql_perror(buff);
+    sql_print_error("Do you already have another mysqld server running on "
+                    "port: %u ?", port);
+    unireg_abort(1);
+  }
+  if (listen(ip_sock,(int) back_log) < 0)
+  {
+    sql_perror("Can't start server: listen() on TCP/IP port");
+    sql_print_error("listen() on TCP/IP failed with error %d",
+                    socket_errno);
+    unireg_abort(1);
   }
+  DBUG_RETURN(ip_sock);
+}
 
-#ifdef _WIN32
+static void network_init(void)
+{
+#ifdef HAVE_SYS_UN_H
+  struct sockaddr_un	UNIXaddr;
+#endif
+  int	arg;
+  DBUG_ENTER("network_init");
+
+  if (MYSQL_CALLBACK_ELSE(thread_scheduler, init, (), 0))
+    unireg_abort(1);			/* purecov: inspected */
+
+  set_ports();
+
+  if (!opt_disable_networking && !opt_bootstrap)
+  {
+    if (mysqld_port)
+      base_ip_sock= activate_tcp_port(mysqld_port);
+    if (mysqld_extra_port)
+      extra_ip_sock= activate_tcp_port(mysqld_extra_port);
+  }
+
+#ifdef __NT__
   /* create named pipe */
   if (Service.IsNT() && mysqld_unix_port[0] && !opt_bootstrap &&
       opt_enable_named_pipe)
   {
+
     strxnmov(pipe_name, sizeof(pipe_name)-1, "\\\\.\\pipe\\",
 	     mysqld_unix_port, NullS);
     bzero((char*) &saPipeSecurity, sizeof(saPipeSecurity));
@@ -1964,6 +2308,7 @@ static void network_init(void)
   DBUG_VOID_RETURN;
 }
 
+
 #endif /*!EMBEDDED_LIBRARY*/
 
 
@@ -1998,17 +2343,14 @@ void close_connection(THD *thd, uint sql_errno)
 #endif /* EMBEDDED_LIBRARY */
 
 
-/** Called when a thread is aborted. */
+/** Called when mysqld is aborted with ^C */
 /* ARGSUSED */
-extern "C" sig_handler end_thread_signal(int sig __attribute__((unused)))
+extern "C" sig_handler end_mysqld_signal(int sig __attribute__((unused)))
 {
-  THD *thd=current_thd;
-  DBUG_ENTER("end_thread_signal");
-  if (thd && ! thd->bootstrap)
-  {
-    statistic_increment(killed_threads, &LOCK_status);
-    MYSQL_CALLBACK(thread_scheduler, end_thread, (thd,0)); /* purecov: inspected */
-  }
+  DBUG_ENTER("end_mysqld_signal");
+  /* Don't call kill_mysql() if signal thread is not running */
+  if (signal_thread_in_use)
+    kill_mysql();                          // Take down mysqld nicely
   DBUG_VOID_RETURN;				/* purecov: deadcode */
 }
 
@@ -2033,10 +2375,10 @@ void thd_cleanup(THD *thd)
     dec_connection_count()
 */
 
-void dec_connection_count()
+void dec_connection_count(THD *thd)
 {
   mysql_mutex_lock(&LOCK_connection_count);
-  --connection_count;
+  (*thd->scheduler->connection_count)--;
   mysql_mutex_unlock(&LOCK_connection_count);
 }
 
@@ -2073,7 +2415,7 @@ void unlink_thd(THD *thd)
   DBUG_PRINT("enter", ("thd: 0x%lx", (long) thd));
 
   thd_cleanup(thd);
-  dec_connection_count();
+  dec_connection_count(thd);
   mysql_mutex_lock(&LOCK_thread_count);
   /*
     Used by binlog_reset_master.  It would be cleaner to use
@@ -2154,7 +2496,7 @@ static bool cache_thread()
         this thread for handling of new THD object/connection.
       */
       thd->mysys_var->abort= 0;
-      thd->thr_create_utime= my_micro_time();
+      thd->thr_create_utime= microsecond_interval_timer();
       threads.append(thd);
       return(1);
     }
@@ -2186,6 +2528,8 @@ bool one_thread_per_connection_end(THD *thd, bool put_in_cache)
 {
   DBUG_ENTER("one_thread_per_connection_end");
   unlink_thd(thd);
+  /* Mark that current_thd is not valid anymore */
+  my_pthread_setspecific_ptr(THR_THD,  0);
   if (put_in_cache)
     put_in_cache= cache_thread();
   mysql_mutex_unlock(&LOCK_thread_count);
@@ -2283,8 +2627,8 @@ static BOOL WINAPI console_event_handler( DWORD type )
 #ifdef DEBUG_UNHANDLED_EXCEPTION_FILTER
 #define DEBUGGER_ATTACH_TIMEOUT 120
 /*
-  Wait for debugger to attach and break into debugger. If debugger is not attached,
-  resume after timeout.
+  Wait for debugger to attach and break into debugger. If debugger is
+  not attached, resume after timeout.
 */
 static void wait_for_debugger(int timeout_sec)
 {
@@ -2409,11 +2753,15 @@ extern "C" char *my_demangle(const char *mangled_name, int *status)
 }
 #endif
 
+extern const char *optimizer_switch_names[];
 
 extern "C" sig_handler handle_segfault(int sig)
 {
   time_t curr_time;
   struct tm tm;
+#ifdef HAVE_STACKTRACE
+  THD *thd=current_thd;
+#endif
 
   /*
     Strictly speaking, one needs a mutex here
@@ -2432,13 +2780,13 @@ extern "C" sig_handler handle_segfault(int sig)
   curr_time= my_time(0);
   localtime_r(&curr_time, &tm);
 
-  fprintf(stderr,"\
-%02d%02d%02d %2d:%02d:%02d - mysqld got " SIGNAL_FMT " ;\n\
+  fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d ",
+          tm.tm_year % 100, tm.tm_mon+1, tm.tm_mday,
+          tm.tm_hour, tm.tm_min, tm.tm_sec);
+  fprintf(stderr,"[ERROR] mysqld got " SIGNAL_FMT " ;\n\
 This could be because you hit a bug. It is also possible that this binary\n\
 or one of the libraries it was linked against is corrupt, improperly built,\n\
 or misconfigured. This error can also be caused by malfunctioning hardware.\n",
-          tm.tm_year % 100, tm.tm_mon+1, tm.tm_mday,
-          tm.tm_hour, tm.tm_min, tm.tm_sec,
 	  sig);
   fprintf(stderr, "\
 We will try our best to scrape up some info that will hopefully help diagnose\n\
@@ -2448,16 +2796,16 @@ and this may fail.\n\n");
           (ulong) dflt_key_cache->key_cache_mem_size);
   fprintf(stderr, "read_buffer_size=%ld\n", (long) global_system_variables.read_buff_size);
   fprintf(stderr, "max_used_connections=%lu\n", max_used_connections);
-  fprintf(stderr, "max_threads=%u\n", thread_scheduler->max_threads);
+  fprintf(stderr, "max_threads=%u\n", thread_scheduler->max_threads +
+          (uint) extra_max_connections);
   fprintf(stderr, "thread_count=%u\n", thread_count);
-  fprintf(stderr, "connection_count=%u\n", connection_count);
   fprintf(stderr, "It is possible that mysqld could use up to \n\
 key_buffer_size + (read_buffer_size + sort_buffer_size)*max_threads = %lu K\n\
 bytes of memory\n", ((ulong) dflt_key_cache->key_cache_mem_size +
 		     (global_system_variables.read_buff_size +
 		      global_system_variables.sortbuff_size) *
-		     thread_scheduler->max_threads +
-                     max_connections * sizeof(THD)) / 1024);
+		     (thread_scheduler->max_threads + extra_max_connections) +
+                     (max_connections + extra_max_connections)* sizeof(THD)) / 1024);
   fprintf(stderr, "Hope that's ok; if not, decrease some variables in the equation.\n\n");
 
 #if defined(HAVE_LINUXTHREADS)
@@ -2473,9 +2821,8 @@ the thread stack. Please read http://dev.mysql.com/doc/mysql/en/linux.html\n\n",
 #endif /* HAVE_LINUXTHREADS */
 
 #ifdef HAVE_STACKTRACE
-  THD *thd=current_thd;
 
-  if (!(test_flags & TEST_NO_STACKTRACE))
+  if (opt_stack_trace)
   {
     fprintf(stderr, "Thread pointer: 0x%lx\n", (long) thd);
     fprintf(stderr, "Attempting backtrace. You can use the following "
@@ -2508,10 +2855,20 @@ the thread stack. Please read http://dev.mysql.com/doc/mysql/en/linux.html\n\n",
     fprintf(stderr, "\nTrying to get some variables.\n"
                     "Some pointers may be invalid and cause the dump to abort.\n");
     fprintf(stderr, "Query (%p): ", thd->query());
-    my_safe_print_str(thd->query(), min(1024, thd->query_length()));
-    fprintf(stderr, "Connection ID (thread ID): %lu\n", (ulong) thd->thread_id);
+    my_safe_print_str(thd->query(), min(65536,thd->query_length()));
+    fprintf(stderr, "\nConnection ID (thread ID): %lu\n", (ulong) thd->thread_id);
     fprintf(stderr, "Status: %s\n", kreason);
-    fputc('\n', stderr);
+    fprintf(stderr, "Optimizer switch: ");
+
+    ulonglong optsw= global_system_variables.optimizer_switch;
+    for (uint i= 0; optimizer_switch_names[i+1]; i++, optsw >>= 1)
+    {
+      if (i)
+        fputc(',', stderr);
+      fprintf(stderr, "%s=%s",
+              optimizer_switch_names[i], optsw & 1 ? "on" : "off");
+    }
+    fprintf(stderr, "\n\n");
   }
   fprintf(stderr, "\
 The manual page at http://dev.mysql.com/doc/mysql/en/crashing.html contains\n\
@@ -2539,7 +2896,7 @@ You should either build a dynamically-linked binary, or force LinuxThreads\n\
 to be used with the LD_ASSUME_KERNEL environment variable. Please consult\n\
 the documentation for your distribution on how to do that.\n");
 #endif
-  
+
   if (locked_in_memory)
   {
     fprintf(stderr, "\n\
@@ -2561,8 +2918,11 @@ bugs.\n");
 #endif
 
 #ifndef __WIN__
-  /* On Windows, do not terminate, but pass control to exception filter */
+  /* Terminate */
   exit(1);
+#else
+  /* On Windows, do not terminate, but pass control to exception filter */
+  ;
 #endif
 }
 
@@ -2584,7 +2944,7 @@ static void init_signals(void)
 
   my_sigset(THR_SERVER_ALARM,print_signal_warning); // Should never be called!
 
-  if (!(test_flags & TEST_NO_STACKTRACE) || (test_flags & TEST_CORE_ON_SIGNAL))
+  if (opt_stack_trace || (test_flags & TEST_CORE_ON_SIGNAL))
   {
     sa.sa_flags = SA_RESETHAND | SA_NODEFER;
     sigemptyset(&sa.sa_mask);
@@ -2612,7 +2972,7 @@ static void init_signals(void)
   {
     /* Change limits so that we will get a core file */
     STRUCT_RLIMIT rl;
-    rl.rlim_cur = rl.rlim_max = RLIM_INFINITY;
+    rl.rlim_cur = rl.rlim_max = (rlim_t) RLIM_INFINITY;
     if (setrlimit(RLIMIT_CORE, &rl) && global_system_variables.log_warnings)
       sql_print_warning("setrlimit could not change the size of core files to 'infinity';  We may not be able to generate a core file on signals");
   }
@@ -2641,12 +3001,13 @@ static void init_signals(void)
     sigaddset(&set,THR_SERVER_ALARM);
   if (test_flags & TEST_SIGINT)
   {
-    my_sigset(thr_kill_signal, end_thread_signal);
-    // May be SIGINT
-    sigdelset(&set, thr_kill_signal);
+    /* Allow SIGINT to break mysqld. This is for debugging with --gdb */
+    my_sigset(SIGINT, end_mysqld_signal);
+    sigdelset(&set, SIGINT);
   }
   else
     sigaddset(&set,SIGINT);
+
   sigprocmask(SIG_SETMASK,&set,NULL);
   pthread_sigmask(SIG_SETMASK,&set,NULL);
   DBUG_VOID_RETURN;
@@ -2705,12 +3066,13 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused)))
     This should actually be '+ max_number_of_slaves' instead of +10,
     but the +10 should be quite safe.
   */
-  init_thr_alarm(thread_scheduler->max_threads +
+  init_thr_alarm(thread_scheduler->max_threads + extra_max_connections +
 		 global_system_variables.max_insert_delayed_threads + 10);
-  if (thd_lib_detected != THD_LIB_LT && (test_flags & TEST_SIGINT))
+  if (test_flags & TEST_SIGINT)
   {
-    (void) sigemptyset(&set);			// Setup up SIGINT for debug
-    (void) sigaddset(&set,SIGINT);		// For debugging
+    /* Allow SIGINT to break mysqld. This is for debugging with --gdb */
+    (void) sigemptyset(&set);
+    (void) sigaddset(&set,SIGINT);
     (void) pthread_sigmask(SIG_UNBLOCK,&set,NULL);
   }
   (void) sigemptyset(&set);			// Setup up SIGINT for debug
@@ -2754,8 +3116,8 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused)))
     {
       DBUG_PRINT("quit",("signal_handler: calling my_thread_end()"));
       my_thread_end();
-      signal_thread_in_use= 0;
       DBUG_LEAVE;                               // Must match DBUG_ENTER()
+      signal_thread_in_use= 0;
       pthread_exit(0);				// Safety
       return 0;                                 // Avoid compiler warnings
     }
@@ -2846,44 +3208,44 @@ extern "C" void my_message_sql(uint error, const char *str, myf MyFlags);
 void my_message_sql(uint error, const char *str, myf MyFlags)
 {
   THD *thd= current_thd;
+  MYSQL_ERROR::enum_warning_level level;
+  sql_print_message_func func;
+
   DBUG_ENTER("my_message_sql");
   DBUG_PRINT("error", ("error: %u  message: '%s'", error, str));
 
   DBUG_ASSERT(str != NULL);
-  /*
-    An error should have a valid error number (!= 0), so it can be caught
-    in stored procedures by SQL exception handlers.
-    Calling my_error() with error == 0 is a bug.
-    Remaining known places to fix:
-    - storage/myisam/mi_create.c, my_printf_error()
-    TODO:
-    DBUG_ASSERT(error != 0);
-  */
+  DBUG_ASSERT(error != 0);
 
-  if (error == 0)
+  mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_ERROR, error, str);
+  if (MyFlags & ME_JUST_INFO)
   {
-    /* At least, prevent new abuse ... */
-    DBUG_ASSERT(strncmp(str, "MyISAM table", 12) == 0);
-    error= ER_UNKNOWN_ERROR;
+    level= MYSQL_ERROR::WARN_LEVEL_NOTE;
+    func= sql_print_information;
+  }
+  else if (MyFlags & ME_JUST_WARNING)
+  {
+    level= MYSQL_ERROR::WARN_LEVEL_WARN;
+    func= sql_print_warning;
+  }
+  else
+  {
+    level= MYSQL_ERROR::WARN_LEVEL_ERROR;
+    func= sql_print_error;
   }
-
-  mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_ERROR, error, str);
 
   if (thd)
   {
     if (MyFlags & ME_FATALERROR)
       thd->is_fatal_error= 1;
-    (void) thd->raise_condition(error,
-                                NULL,
-                                MYSQL_ERROR::WARN_LEVEL_ERROR,
-                                str);
+    (void) thd->raise_condition(error, NULL, level, str);
   }
 
   /* When simulating OOM, skip writing to error log to avoid mtr errors */
   DBUG_EXECUTE_IF("simulate_out_of_memory", DBUG_VOID_RETURN;);
 
-  if (!thd || MyFlags & ME_NOREFRESH)
-    sql_print_error("%s: %s",my_progname,str); /* purecov: inspected */
+  if (!thd || (MyFlags & ME_NOREFRESH))
+    (*func)("%s: %s", my_progname_short, str); /* purecov: inspected */
   DBUG_VOID_RETURN;
 }
 
@@ -2926,7 +3288,10 @@ const char *load_default_groups[]= {
 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
 "mysql_cluster",
 #endif
-"mysqld","server", MYSQL_BASE_VERSION, 0, 0};
+"mysqld", "server", MYSQL_BASE_VERSION,
+"mariadb", MARIADB_BASE_VERSION,
+"client-server",
+0, 0};
 
 #if defined(__WIN__) && !defined(EMBEDDED_LIBRARY)
 static const int load_default_groups_sz=
@@ -3061,6 +3426,7 @@ SHOW_VAR com_status_vars[]= {
   {"show_binlog_events",   (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOG_EVENTS]), SHOW_LONG_STATUS},
   {"show_binlogs",         (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOGS]), SHOW_LONG_STATUS},
   {"show_charsets",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CHARSETS]), SHOW_LONG_STATUS},
+  {"show_client_statistics",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CLIENT_STATS]), SHOW_LONG_STATUS},
   {"show_collations",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLLATIONS]), SHOW_LONG_STATUS},
   {"show_contributors",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CONTRIBUTORS]), SHOW_LONG_STATUS},
   {"show_create_db",       (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_DB]), SHOW_LONG_STATUS},
@@ -3082,6 +3448,7 @@ SHOW_VAR com_status_vars[]= {
   {"show_function_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS_FUNC]), SHOW_LONG_STATUS},
   {"show_grants",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS},
   {"show_keys",            (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS},
+  {"show_index_statistics",     (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_INDEX_STATS]), SHOW_LONG_STATUS},
   {"show_master_status",   (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS},
   {"show_open_tables",     (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_OPEN_TABLES]), SHOW_LONG_STATUS},
   {"show_plugins",         (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_PLUGINS]), SHOW_LONG_STATUS},
@@ -3098,9 +3465,11 @@ SHOW_VAR com_status_vars[]= {
   {"show_slave_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS},
   {"show_status",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS},
   {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
+  {"show_table_statistics",     (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATS]), SHOW_LONG_STATUS},
   {"show_table_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
   {"show_tables",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS},
   {"show_triggers",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS},
+  {"show_user_statistics",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_USER_STATS]), SHOW_LONG_STATUS},
   {"show_variables",       (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS},
   {"show_warnings",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS},
   {"slave_start",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SLAVE_START]), SHOW_LONG_STATUS},
@@ -3126,23 +3495,11 @@ SHOW_VAR com_status_vars[]= {
   {NullS, NullS, SHOW_LONG}
 };
 
-/**
-  Create the name of the default general log file
-  
-  @param[IN] buff    Location for building new string.
-  @param[IN] log_ext The extension for the file (e.g .log)
-  @returns Pointer to a new string containing the name
-*/
-static inline char *make_default_log_name(char *buff,const char* log_ext)
-{
-  return make_log_name(buff, default_logfile_name, log_ext);
-}
-
 static int init_common_variables()
 {
-  char buff[FN_REFLEN];
   umask(((~my_umask) & 0666));
   my_decimal_set_zero(&decimal_zero); // set decimal_zero constant;
+
   tzset();			// Set tzname
 
   max_system_variables.pseudo_thread_id= (ulong)~0;
@@ -3206,17 +3563,23 @@ static int init_common_variables()
 
   if (gethostname(glob_hostname,sizeof(glob_hostname)) < 0)
   {
+    /*
+      Get hostname of computer (used by 'show variables') and as default
+      basename for the pid file if --log-basename is not given.
+    */
     strmake(glob_hostname, STRING_WITH_LEN("localhost"));
     sql_print_warning("gethostname failed, using '%s' as hostname",
-                      glob_hostname);
-    strmake(default_logfile_name, STRING_WITH_LEN("mysql"));
+                        glob_hostname);
+    opt_log_basename= const_cast<char *>("mysql");
   }
   else
-    strmake(default_logfile_name, glob_hostname, 
-	    sizeof(default_logfile_name)-5);
+    opt_log_basename= glob_hostname;
 
-  strmake(pidfile_name, default_logfile_name, sizeof(pidfile_name)-5);
-  strmov(fn_ext(pidfile_name),".pid");		// Add proper extension
+  if (!*pidfile_name)
+  {
+    strmake(pidfile_name, opt_log_basename, sizeof(pidfile_name)-5);
+    strmov(fn_ext(pidfile_name),".pid");		// Add proper extension
+  }
 
   /*
     The default-storage-engine entry in my_long_options should have a
@@ -3230,7 +3593,7 @@ static int init_common_variables()
     (except in the embedded server, where the default continues to
     be MyISAM)
   */
-#ifdef EMBEDDED_LIBRARY
+#ifndef WITH_INNOBASE_STORAGE_ENGINE
   default_storage_engine= const_cast<char *>("MyISAM");
 #else
   default_storage_engine= const_cast<char *>("InnoDB");
@@ -3357,7 +3720,8 @@ static int init_common_variables()
     uint files, wanted_files, max_open_files;
 
     /* MyISAM requires two file handles per table. */
-    wanted_files= 10+max_connections+table_cache_size*2;
+    wanted_files= (10 + max_connections + extra_max_connections +
+                   table_cache_size*2);
     /*
       We are trying to allocate no less than max_connections*5 file
       handles (i.e. we are trying to set the limit so that they will
@@ -3368,7 +3732,8 @@ static int init_common_variables()
       can't get max_connections*5 but still got no less than was
       requested (value of wanted_files).
     */
-    max_open_files= max(max(wanted_files, max_connections*5),
+    max_open_files= max(max(wanted_files,
+                            (max_connections + extra_max_connections)*5),
                         open_files_limit);
     files= my_set_max_open_files(max_open_files);
 
@@ -3414,7 +3779,7 @@ static int init_common_variables()
   if (init_errmessage())	/* Read error messages from file */
     return 1;
   init_client_errs();
-  mysql_client_plugin_init();
+  mysql_library_init(unused,unused,unused); /* for replication */
   lex_init();
   if (item_create_init())
     return 1;
@@ -3475,7 +3840,8 @@ static int init_common_variables()
   global_system_variables.collation_connection=  default_charset_info;
   global_system_variables.character_set_results= default_charset_info;
   global_system_variables.character_set_client=  default_charset_info;
-  if (!(character_set_filesystem= 
+
+  if (!(character_set_filesystem=
         get_charset_by_csname(character_set_filesystem_name,
                               MY_CS_PRIMARY, MYF(MY_WME))))
     return 1;
@@ -3490,29 +3856,22 @@ static int init_common_variables()
   global_system_variables.lc_time_names= my_default_lc_time_names;
 
   /* check log options and issue warnings if needed */
-  if (opt_log && opt_logname && !(log_output_options & LOG_FILE) &&
-      !(log_output_options & LOG_NONE))
+  if (opt_log && opt_logname && *opt_logname &&
+      !(log_output_options & (LOG_FILE | LOG_NONE)))
     sql_print_warning("Although a path was specified for the "
                       "--log option, log tables are used. "
                       "To enable logging to files use the --log-output option.");
 
-  if (opt_slow_log && opt_slow_logname && !(log_output_options & LOG_FILE)
-      && !(log_output_options & LOG_NONE))
+  if (opt_slow_log && opt_slow_logname && *opt_slow_logname &&
+      !(log_output_options & (LOG_FILE | LOG_NONE)))
     sql_print_warning("Although a path was specified for the "
                       "--log-slow-queries option, log tables are used. "
                       "To enable logging to files use the --log-output=file option.");
 
-#define FIX_LOG_VAR(VAR, ALT)                                   \
-  if (!VAR || !*VAR)                                            \
-  {                                                             \
-    my_free(VAR); /* it could be an allocated empty string "" */ \
-    VAR= my_strdup(ALT, MYF(0));                                \
-  }
-
-  FIX_LOG_VAR(opt_logname,
-              make_default_log_name(buff, ".log"));
-  FIX_LOG_VAR(opt_slow_logname,
-              make_default_log_name(buff, "-slow.log"));
+  if (!opt_logname || !*opt_logname)
+    make_default_log_name(&opt_logname, ".log", false);
+  if (!opt_slow_logname || !*opt_slow_logname)
+    make_default_log_name(&opt_slow_logname, "-slow.log", false);
 
 #if defined(ENABLED_DEBUG_SYNC)
   /* Initialize the debug sync facility. See debug_sync.cc. */
@@ -3557,8 +3916,7 @@ You should consider changing lower_case_table_names to 1 or 2",
     }
   }
   else if (lower_case_table_names == 2 &&
-           !(lower_case_file_system=
-             (test_if_case_insensitive(mysql_real_data_home) == 1)))
+           !(lower_case_file_system= (lower_case_file_system == 1)))
   {
     if (global_system_variables.log_warnings)
       sql_print_warning("lower_case_table_names was set to 2, even though your "
@@ -3569,8 +3927,7 @@ You should consider changing lower_case_table_names to 1 or 2",
   }
   else
   {
-    lower_case_file_system=
-      (test_if_case_insensitive(mysql_real_data_home) == 1);
+    lower_case_file_system= (lower_case_file_system == 1);
   }
 
   /* Reset table_alias_charset, now that lower_case_table_names is set. */
@@ -3605,10 +3962,22 @@ static int init_thread_environment()
                    &LOCK_prepared_stmt_count, MY_MUTEX_INIT_FAST);
   mysql_mutex_init(key_LOCK_error_messages,
                    &LOCK_error_messages, MY_MUTEX_INIT_FAST);
-  mysql_mutex_init(key_LOCK_uuid_generator,
-                   &LOCK_uuid_generator, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(key_LOCK_uuid_short_generator,
+                   &LOCK_short_uuid_generator, MY_MUTEX_INIT_FAST);
   mysql_mutex_init(key_LOCK_connection_count,
                    &LOCK_connection_count, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(key_LOCK_stats, &LOCK_stats, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(key_LOCK_global_user_client_stats,
+                   &LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(key_LOCK_global_table_stats,
+                   &LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(key_LOCK_global_index_stats,
+                   &LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(key_LOCK_prepare_ordered, &LOCK_prepare_ordered,
+                   MY_MUTEX_INIT_SLOW);
+  mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered,
+                   MY_MUTEX_INIT_SLOW);
+
 #ifdef HAVE_OPENSSL
   mysql_mutex_init(key_LOCK_des_key_file,
                    &LOCK_des_key_file, MY_MUTEX_INIT_FAST);
@@ -3660,20 +4029,20 @@ static int init_thread_environment()
 
 #if defined(HAVE_OPENSSL) && !defined(HAVE_YASSL)
 static unsigned long openssl_id_function()
-{ 
+{
   return (unsigned long) pthread_self();
-} 
+}
 
 
 static openssl_lock_t *openssl_dynlock_create(const char *file, int line)
-{ 
+{
   openssl_lock_t *lock= new openssl_lock_t;
   mysql_rwlock_init(key_rwlock_openssl, &lock->lock);
   return lock;
 }
 
 
-static void openssl_dynlock_destroy(openssl_lock_t *lock, const char *file, 
+static void openssl_dynlock_destroy(openssl_lock_t *lock, const char *file,
 				    int line)
 {
   mysql_rwlock_destroy(&lock->lock);
@@ -3693,7 +4062,7 @@ static void openssl_lock_function(int mode, int n, const char *file, int line)
 }
 
 
-static void openssl_lock(int mode, openssl_lock_t *lock, const char *file, 
+static void openssl_lock(int mode, openssl_lock_t *lock, const char *file,
 			 int line)
 {
   int err;
@@ -3718,7 +4087,7 @@ static void openssl_lock(int mode, openssl_lock_t *lock, const char *file,
     sql_print_error("Fatal: OpenSSL interface problem (mode=0x%x)", mode);
     abort();
   }
-  if (err) 
+  if (err)
   {
     sql_print_error("Fatal: can't %s OpenSSL lock", what);
     abort();
@@ -3774,6 +4143,32 @@ static void end_ssl()
 #endif /* HAVE_OPENSSL */
 }
 
+#ifdef _WIN32
+/**
+  Registers a file to be collected when Windows Error Reporting creates a crash 
+  report.
+
+  @note only works on Vista and later, since WerRegisterFile() is not available
+  on earlier Windows.
+*/
+#include <werapi.h>
+static void add_file_to_crash_report(char *file)
+{
+  /* Load WerRegisterFile function dynamically.*/
+  HRESULT (WINAPI *pWerRegisterFile)(PCWSTR, WER_REGISTER_FILE_TYPE, DWORD)
+    =(HRESULT (WINAPI *) (PCWSTR, WER_REGISTER_FILE_TYPE, DWORD))
+    GetProcAddress(GetModuleHandle("kernel32"),"WerRegisterFile");
+
+  if (pWerRegisterFile)
+  {
+    wchar_t wfile[MAX_PATH+1]= {0};
+    if (mbstowcs(wfile, file, MAX_PATH) != (size_t)-1)
+    {
+      pWerRegisterFile(wfile, WerRegFileTypeOther, WER_FILE_ANONYMOUS_DATA);
+    }
+  }
+}
+#endif
 
 static int init_server_components()
 {
@@ -3789,12 +4184,14 @@ static int init_server_components()
   query_cache_set_min_res_unit(query_cache_min_res_unit);
   query_cache_init();
   query_cache_resize(query_cache_size);
-  randominit(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2);
+  my_rnd_init(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2);
   setup_fpu();
   init_thr_lock();
+  my_uuid_init((ulong) (my_rnd(&sql_rand))*12345,12345);
 #ifdef HAVE_REPLICATION
   init_slave_list();
 #endif
+  wt_init();
 
   /* Setup logs */
 
@@ -3829,9 +4226,16 @@ static int init_server_components()
 
       if (!res)
         setbuf(stderr, NULL);
+
+#ifdef _WIN32
+      /* Add error log to windows crash reporting. */
+      add_file_to_crash_report(log_error_file);
+#endif
     }
   }
 
+  /* set up the hook before initializing plugins which may use it */
+  error_handler_hook= my_message_sql;
   proc_info_hook= set_thd_proc_info;
 
 #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
@@ -3933,8 +4337,7 @@ a file name for --log-bin-index option", opt_binlog_index_name);
     }
     if (ln == buf)
     {
-      my_free(opt_bin_logname);
-      opt_bin_logname=my_strdup(buf, MYF(0));
+      opt_bin_logname= my_once_strdup(buf, MYF(MY_WME));
     }
     if (mysql_bin_log.open_index_file(opt_binlog_index_name, ln, TRUE))
     {
@@ -3943,7 +4346,10 @@ a file name for --log-bin-index option", opt_binlog_index_name);
   }
 
   /* call ha_init_key_cache() on all key caches to init them */
-  process_key_caches(&ha_init_key_cache);
+  process_key_caches(&ha_init_key_cache, 0);
+
+  init_global_table_stats();
+  init_global_index_stats();
 
   /* Allow storage engine to give real error messages */
   if (ha_init_errors())
@@ -3989,8 +4395,7 @@ a file name for --log-bin-index option", opt_binlog_index_name);
 
     if (remaining_argc > 1)
     {
-      fprintf(stderr, "%s: Too many arguments (first extra is '%s').\n"
-              "Use --verbose --help to get a list of available options\n",
+      fprintf(stderr, "%s: Too many arguments (first extra is '%s').\n",
               my_progname, remaining_argv[1]);
       unireg_abort(1);
     }
@@ -4010,7 +4415,6 @@ a file name for --log-bin-index option", opt_binlog_index_name);
     unireg_abort(1);
   }
 
-#ifdef WITH_CSV_STORAGE_ENGINE
   if (opt_bootstrap)
     log_output_options= LOG_FILE;
   else
@@ -4044,10 +4448,6 @@ a file name for --log-bin-index option", opt_binlog_index_name);
     logger.set_handlers(LOG_FILE, opt_slow_log ? log_output_options:LOG_NONE,
                         opt_log ? log_output_options:LOG_NONE);
   }
-#else
-  logger.set_handlers(LOG_FILE, opt_slow_log ? LOG_FILE:LOG_NONE,
-                      opt_log ? LOG_FILE:LOG_NONE);
-#endif
 
   /*
     Set the default storage engine
@@ -4079,9 +4479,18 @@ a file name for --log-bin-index option", opt_binlog_index_name);
       Need to unlock as global_system_variables.table_plugin
       was acquired during plugin_init()
     */
+    mysql_mutex_lock(&LOCK_global_system_variables);
     plugin_unlock(0, global_system_variables.table_plugin);
     global_system_variables.table_plugin= plugin;
+    mysql_mutex_unlock(&LOCK_global_system_variables);
   }
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_ARIA_FOR_TMP_TABLES)
+  if (!ha_storage_engine_is_enabled(maria_hton) && !opt_bootstrap)
+  {
+    sql_print_error("Aria engine is not enabled or did not start. The Aria engine must be enabled to continue as mysqld was configured with --with-aria-tmp-tables");
+    unireg_abort(1);
+  }
+#endif
 
   tc_log= (total_ha_2pc > 1 ? (opt_bin_log  ?
                                (TC_LOG *) &mysql_bin_log :
@@ -4140,6 +4549,8 @@ a file name for --log-bin-index option", opt_binlog_index_name);
 
   init_max_user_conn();
   init_update_queries();
+  init_global_user_stats();
+  init_global_client_stats();
   DBUG_RETURN(0);
 }
 
@@ -4213,7 +4624,7 @@ static void handle_connections_methods()
       handler_count--;
     }
   }
-#endif 
+#endif
 
   while (handler_count > 0)
     mysql_cond_wait(&COND_handler_count, &LOCK_thread_count);
@@ -4395,13 +4806,6 @@ int mysqld_main(int argc, char **argv)
 
   init_error_log_mutex();
 
-  /* Set signal used to kill MySQL */
-#if defined(SIGUSR2)
-  thr_kill_signal= thd_lib_detected == THD_LIB_LT ? SIGINT : SIGUSR2;
-#else
-  thr_kill_signal= SIGINT;
-#endif
-
   /* Initialize audit interface globals. Audit plugins are inited later. */
   mysql_audit_initialize();
 
@@ -4489,13 +4893,14 @@ int mysqld_main(int argc, char **argv)
 
 #ifndef DBUG_OFF
   test_lc_time_sz();
+  srand((uint) time(NULL)); 
 #endif
 
   /*
     We have enough space for fiddling with the argv, continue
   */
   check_data_home(mysql_real_data_home);
-  if (my_setwd(mysql_real_data_home,MYF(MY_WME)) && !opt_help)
+  if (my_setwd(mysql_real_data_home, opt_help ? 0 : MYF(MY_WME)) && !opt_help)
     unireg_abort(1);				/* purecov: inspected */
 
   if ((user_info= check_user(mysqld_user)))
@@ -4545,7 +4950,6 @@ int mysqld_main(int argc, char **argv)
     init signals & alarm
     After this we can't quit by a simple unireg_abort
   */
-  error_handler_hook= my_message_sql;
   start_signal_handler();				// Creates pidfile
 
   if (mysql_rm_tmp_tables() || acl_init(opt_noacl) ||
@@ -4617,7 +5021,13 @@ int mysqld_main(int argc, char **argv)
   {
     select_thread_in_use= 0;                    // Allow 'kill' to work
     bootstrap(mysql_stdin);
-    unireg_abort(bootstrap_error ? 1 : 0);
+    if (!kill_in_progress)
+      unireg_abort(bootstrap_error ? 1 : 0);
+    else
+    {
+      sleep(2);                                 // Wait for kill
+      exit(0);
+    }
   }
   if (opt_init_file && *opt_init_file)
   {
@@ -4625,6 +5035,12 @@ int mysqld_main(int argc, char **argv)
       unireg_abort(1);
   }
 
+  /*
+    We must have LOCK_open before LOCK_global_system_variables because
+    LOCK_open is hold while sql_plugin.c::intern_sys_var_ptr() is called.
+  */
+  mysql_mutex_record_order(&LOCK_open, &LOCK_global_system_variables);
+
   create_shutdown_thread();
   start_handle_manager();
 
@@ -4651,7 +5067,7 @@ int mysqld_main(int argc, char **argv)
 #endif /* _WIN32 || HAVE_SMEM */
 
   /* (void) pthread_attr_destroy(&connection_attrib); */
-  
+
   DBUG_PRINT("quit",("Exiting main thread"));
 
 #ifndef __WIN__
@@ -4693,7 +5109,6 @@ int mysqld_main(int argc, char **argv)
       CloseHandle(hEventShutdown);
   }
 #endif
-  clean_up(1);
   mysqld_exit(0);
 }
 
@@ -4742,10 +5157,8 @@ static char *add_quoted_string(char *to, const char *from, char *to_end)
   @param file_path		Path to this program
   @param startup_option	Startup option to mysqld
 
-  @retval
-    0		option handled
-  @retval
-    1		Could not handle option
+  @retval 0	option handled
+  @retval 1	Could not handle option
 */
 
 static bool
@@ -4999,7 +5412,7 @@ void handle_connection_in_main_thread(THD *thd)
   thread_cache_size=0;			// Safety
   threads.append(thd);
   mysql_mutex_unlock(&LOCK_thread_count);
-  thd->start_utime= my_micro_time();
+  thd->start_utime= microsecond_interval_timer();
   do_handle_one_connection(thd);
 }
 
@@ -5025,7 +5438,7 @@ void create_thread_to_handle_connection(THD *thd)
     thread_created++;
     threads.append(thd);
     DBUG_PRINT("info",(("creating thread %lu"), thd->thread_id));
-    thd->prior_thr_create_utime= thd->start_utime= my_micro_time();
+    thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer();
     if ((error= mysql_thread_create(key_thread_one_connection,
                                     &thd->real_id, &connection_attrib,
                                     handle_one_connection,
@@ -5040,7 +5453,7 @@ void create_thread_to_handle_connection(THD *thd)
       mysql_mutex_unlock(&LOCK_thread_count);
 
       mysql_mutex_lock(&LOCK_connection_count);
-      --connection_count;
+      (*thd->scheduler->connection_count)--;
       mysql_mutex_unlock(&LOCK_connection_count);
 
       statistic_increment(aborted_connects,&LOCK_status);
@@ -5085,20 +5498,22 @@ static void create_new_thread(THD *thd)
 
   mysql_mutex_lock(&LOCK_connection_count);
 
-  if (connection_count >= max_connections + 1 || abort_loop)
+  if (*thd->scheduler->connection_count >=
+      *thd->scheduler->max_connections + 1|| abort_loop)
   {
     mysql_mutex_unlock(&LOCK_connection_count);
 
     DBUG_PRINT("error",("Too many connections"));
     close_connection(thd, ER_CON_COUNT_ERROR);
+    statistic_increment(denied_connections, &LOCK_status);
     delete thd;
     DBUG_VOID_RETURN;
   }
 
-  ++connection_count;
+  ++*thd->scheduler->connection_count;
 
-  if (connection_count > max_used_connections)
-    max_used_connections= connection_count;
+  if (connection_count + extra_connection_count > max_used_connections)
+    max_used_connections= connection_count + extra_connection_count;
 
   mysql_mutex_unlock(&LOCK_connection_count);
 
@@ -5115,7 +5530,7 @@ static void create_new_thread(THD *thd)
 
   thread_count++;
 
-  MYSQL_CALLBACK(thread_scheduler, add_connection, (thd));
+  MYSQL_CALLBACK(thd->scheduler, add_connection, (thd));
 
   DBUG_VOID_RETURN;
 }
@@ -5127,10 +5542,11 @@ inline void kill_broken_server()
 {
   /* hack to get around signals ignored in syscalls for problem OS's */
   if (unix_sock == INVALID_SOCKET ||
-      (!opt_disable_networking && ip_sock == INVALID_SOCKET))
+      (!opt_disable_networking && base_ip_sock == INVALID_SOCKET))
   {
     select_thread_in_use = 0;
     /* The following call will never return */
+    DBUG_PRINT("general", ("killing server because socket is closed"));
     kill_server((void*) MYSQL_KILL_SIGNAL);
   }
 }
@@ -5150,49 +5566,39 @@ void handle_connections_sockets()
   THD *thd;
   struct sockaddr_storage cAddr;
   int ip_flags=0,socket_flags=0,flags=0,retval;
+  int extra_ip_flags=0;
   st_vio *vio_tmp;
 #ifdef HAVE_POLL
   int socket_count= 0;
-  struct pollfd fds[2]; // for ip_sock and unix_sock
+  struct pollfd fds[3]; // for ip_sock, unix_sock and extra_ip_sock
+#define setup_fds(X)                    \
+    fds[socket_count].fd= X;            \
+    fds[socket_count].events= POLLIN;   \
+    socket_count++
 #else
   fd_set readFDs,clientFDs;
-  uint max_used_connection= (uint) (max(ip_sock,unix_sock)+1);
+  uint max_used_connection= (uint)
+    max(max(base_ip_sock, unix_sock), extra_ip_sock) + 1;
+#define setup_fds(X)    FD_SET(X,&clientFDs)
+  FD_ZERO(&clientFDs);
 #endif
 
   DBUG_ENTER("handle_connections_sockets");
 
-  (void) ip_flags;
-  (void) socket_flags;
-
-#ifndef HAVE_POLL
-  FD_ZERO(&clientFDs);
-#endif
-
-  if (ip_sock != INVALID_SOCKET)
+  if (base_ip_sock != INVALID_SOCKET)
   {
-#ifdef HAVE_POLL
-    fds[socket_count].fd= ip_sock;
-    fds[socket_count].events= POLLIN;
-    socket_count++;
-#else
-    FD_SET(ip_sock,&clientFDs);
-#endif    
-#ifdef HAVE_FCNTL
-    ip_flags = fcntl(ip_sock, F_GETFL, 0);
-#endif
+    setup_fds(base_ip_sock);
+    ip_flags = fcntl(base_ip_sock, F_GETFL, 0);
+  }
+  if (extra_ip_sock != INVALID_SOCKET)
+  {
+    setup_fds(extra_ip_sock);
+    extra_ip_flags = fcntl(extra_ip_sock, F_GETFL, 0);
   }
 #ifdef HAVE_SYS_UN_H
-#ifdef HAVE_POLL
-  fds[socket_count].fd= unix_sock;
-  fds[socket_count].events= POLLIN;
-  socket_count++;
-#else
-  FD_SET(unix_sock,&clientFDs);
-#endif
-#ifdef HAVE_FCNTL
+  setup_fds(unix_sock);
   socket_flags=fcntl(unix_sock, F_GETFL, 0);
 #endif
-#endif
 
   DBUG_PRINT("general",("Waiting for connections."));
   MAYBE_BROKEN_SYSCALL;
@@ -5230,26 +5636,26 @@ void handle_connections_sockets()
       if (fds[i].revents & POLLIN)
       {
         sock= fds[i].fd;
-#ifdef HAVE_FCNTL
         flags= fcntl(sock, F_GETFL, 0);
-#else
-        flags= 0;
-#endif // HAVE_FCNTL
         break;
       }
     }
 #else  // HAVE_POLL
-#ifdef HAVE_SYS_UN_H
-    if (FD_ISSET(unix_sock,&readFDs))
+    if (FD_ISSET(base_ip_sock,&readFDs))
     {
-      sock = unix_sock;
-      flags= socket_flags;
+      sock=  base_ip_sock;
+      flags= ip_flags;
     }
     else
-#endif // HAVE_SYS_UN_H
+    if (FD_ISSET(extra_ip_sock,&readFDs))
     {
-      sock = ip_sock;
-      flags= ip_flags;
+      sock=  extra_ip_sock;
+      flags= extra_ip_flags;
+    }
+    else
+    {
+      sock = unix_sock;
+      flags= socket_flags;
     }
 #endif // HAVE_POLL
 
@@ -5296,7 +5702,7 @@ void handle_connections_sockets()
 
 #ifdef HAVE_LIBWRAP
     {
-      if (sock == ip_sock)
+      if (sock == base_ip_sock || sock == extra_ip_sock)
       {
 	struct request_info req;
 	signal(SIGCHLD, SIG_DFL);
@@ -5377,6 +5783,11 @@ void handle_connections_sockets()
     if (sock == unix_sock)
       thd->security_ctx->host=(char*) my_localhost;
 
+    if (sock == extra_ip_sock)
+    {
+      thd->extra_port= 1;
+      thd->scheduler= extra_thread_scheduler;
+    }
     create_new_thread(thd);
   }
   DBUG_VOID_RETURN;
@@ -5838,9 +6249,12 @@ struct my_option my_long_options[]=
    &disconnect_slave_event_count, &disconnect_slave_event_count,
    0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
 #endif /* HAVE_REPLICATION */
+#ifdef HAVE_STACKTRACE
+  {"stack-trace", 0 , "Print a symbolic stack trace on failure",
+   &opt_stack_trace, &opt_stack_trace, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
+#endif /* HAVE_STACKTRACE */
   {"exit-info", 'T', "Used for debugging. Use at your own risk.", 0, 0, 0,
    GET_LONG, OPT_ARG, 0, 0, 0, 0, 0, 0},
-
   {"external-locking", 0, "Use system (external) locking (disabled by "
    "default).  With this option enabled you can run myisamchk to test "
    "(not repair) tables while the MySQL server is running. Disable with "
@@ -5873,11 +6287,22 @@ struct my_option my_long_options[]=
   {"log", 'l', "Log connections and queries to file (deprecated option, use "
    "--general-log/--general-log-file instead).", &opt_logname, &opt_logname,
    0, GET_STR_ALLOC, OPT_ARG, 0, 0, 0, 0, 0, 0},
+  {"log-basename", OPT_LOG_BASENAME,
+   "Basename for all log files and the .pid file. This sets all log file "
+   "names at once (in 'datadir') and is normally the only option you need "
+   "for specifying log files. Sets names for --log-bin, --log-bin-index, "
+   "--relay-log, --relay-log-index, --general-log-file, "
+   "--log-slow-query-log-file, --log-error-file, and --pid-file",
+   &opt_log_basename, &opt_log_basename, 0, GET_STR, REQUIRED_ARG,
+   0, 0, 0, 0, 0, 0},
   {"log-bin", OPT_BIN_LOG,
-   "Log update queries in binary format. Optional (but strongly recommended "
-   "to avoid replication problems if server's hostname changes) argument "
-   "should be the chosen location for the binary log files.",
-   &opt_bin_logname, &opt_bin_logname, 0, GET_STR_ALLOC,
+   "Log update queries in binary format. Optional argument should be name for "
+   "binary log. If not given "
+   "datadir/'log-basename'-bin or 'datadir'/mysql-bin will be used (the later if "
+   "--log-basename is not specified). We strongly recommend to use either "
+   "--log-basename or specify a filename to ensure that replication doesn't "
+   "stop if the real hostname of the computer changes'.",
+   &opt_bin_logname, &opt_bin_logname, 0, GET_STR,
    OPT_ARG, 0, 0, 0, 0, 0, 0},
   {"log-bin-index", 0,
    "File that holds the names for last binary log files.",
@@ -5899,9 +6324,10 @@ struct my_option my_long_options[]=
   &opt_log_slow_slave_statements, &opt_log_slow_slave_statements,
   0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"log-slow-queries", OPT_SLOW_QUERY_LOG,
-   "Log slow queries to a table or log file. Defaults logging to table "
-   "mysql.slow_log or hostname-slow.log if --log-output=file is used. "
-   "Must be enabled to activate other slow log options. "
+   "Enable logging of slow queries (longer than --long-query-time) to log file "
+   "or table. Optional argument is a file name for the slow log. If not given, "
+   "'log-basename'-slow.log will be used. Use --log-output=TABLE if you want "
+   "to have the log in the table 'mysql.slow_log'. "
    "Deprecated option, use --slow-query-log/--slow-query-log-file instead.",
    &opt_slow_logname, &opt_slow_logname, 0, GET_STR_ALLOC, OPT_ARG,
    0, 0, 0, 0, 0, 0},
@@ -5913,12 +6339,13 @@ struct my_option my_long_options[]=
 #ifdef HAVE_MMAP
   {"log-tc-size", 0, "Size of transaction coordinator log.",
    &opt_tc_log_size, &opt_tc_log_size, 0, GET_ULONG,
-   REQUIRED_ARG, TC_LOG_MIN_SIZE, TC_LOG_MIN_SIZE, ULONG_MAX, 0,
+   REQUIRED_ARG, TC_LOG_MIN_SIZE, TC_LOG_MIN_SIZE, (longlong) ULONG_MAX, 0,
    TC_LOG_PAGE_SIZE, 0},
 #endif
   {"master-info-file", 0,
    "The location and name of the file that remembers the master and where "
-   "the I/O replication thread is in the master's binlogs.",
+   "the I/O replication thread is in the master's binlogs. Defaults to "
+   "master.info",
    &master_info_file, &master_info_file, 0, GET_STR,
    REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   {"master-retry-count", 0,
@@ -6013,6 +6440,9 @@ struct my_option my_long_options[]=
    "Show user and password in SHOW SLAVE HOSTS on this master.",
    &opt_show_slave_auth_info, &opt_show_slave_auth_info, 0,
    GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"skip-bdb", OPT_DEPRECATED_OPTION,
+   "Deprecated option; Exist only for compatiblity with old my.cnf files",
+   0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
 #ifndef DISABLE_GRANT_OPTIONS
   {"skip-grant-tables", 0,
    "Start without grant tables. This gives all users FULL ACCESS to all tables.",
@@ -6021,14 +6451,9 @@ struct my_option my_long_options[]=
 #endif
   {"skip-host-cache", OPT_SKIP_HOST_CACHE, "Don't cache host names.", 0, 0, 0,
    GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
-  {"skip-new", OPT_SKIP_NEW, "Don't use new, possibly wrong routines.",
-   0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"skip-slave-start", 0,
    "If set, slave is not autostarted.", &opt_skip_slave_start,
    &opt_skip_slave_start, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
-  {"skip-stack-trace", OPT_SKIP_STACK_TRACE,
-   "Don't print a stack trace on failure.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0,
-   0, 0, 0, 0},
   {"skip-thread-priority", OPT_SKIP_PRIOR,
    "Don't give threads different priorities. This option is deprecated "
    "because it has no effect; the implied behavior is already the default.",
@@ -6058,7 +6483,10 @@ struct my_option my_long_options[]=
      purify. These are not suppressed: instead we disable symlinks
      option if compiled with valgrind support.
    */
-   IF_PURIFY(0,1), 0, 0, 0, 0, 0},
+   IF_VALGRIND(0,1), 0, 0, 0, 0, 0},
+  {"sync_sys", 0,
+   "Enable system sync calls. Disable only when running tests or debugging!",
+   &opt_sync, &opt_sync, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
   {"sysdate-is-now", 0,
    "Non-default option to alias SYSDATE() to NOW() to make it safe-replicable. "
    "Since 5.0, SYSDATE() returns a `dynamic' value different for different "
@@ -6106,6 +6534,12 @@ struct my_option my_long_options[]=
   {"table_cache", 0, "Deprecated; use --table-open-cache instead.",
    &table_cache_size, &table_cache_size, 0, GET_ULONG,
    REQUIRED_ARG, TABLE_OPEN_CACHE_DEFAULT, 1, 512*1024L, 0, 1, 0},
+#ifndef DBUG_OFF
+  {"debug-assert-if-crashed-table", 0,
+   "Do an assert in handler::print_error() if we get a crashed table",
+   &debug_assert_if_crashed_table, &debug_assert_if_crashed_table,
+   0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+#endif
   {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
 };
 
@@ -6409,7 +6843,7 @@ static int show_ssl_ctx_get_session_cache_mode(THD *thd, SHOW_VAR *var, char *bu
 }
 
 /*
-   Functions relying on SSL 
+   Functions relying on SSL
    Note: In the show_ssl_* functions, we need to check if we have a
          valid vio-object since this isn't always true, specifically
          when session_status or global_status is requested from
@@ -6503,6 +6937,46 @@ static int show_ssl_get_cipher_list(THD *thd, SHOW_VAR *var, char *buff)
 
 #endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
 
+static int show_default_keycache(THD *thd, SHOW_VAR *var, char *buff)
+{
+  struct st_data {
+    KEY_CACHE_STATISTICS stats;
+    SHOW_VAR var[8];
+  } *data;
+  SHOW_VAR *v;
+
+  data=(st_data *)buff;
+  v= data->var;
+
+  var->type= SHOW_ARRAY;
+  var->value= (char*)v;
+
+  get_key_cache_statistics(dflt_key_cache, 0, &data->stats);
+
+#define set_one_keycache_var(X,Y)       \
+  v->name= X;                           \
+  v->type= SHOW_LONGLONG;               \
+  v->value= (char*)&data->stats.Y;      \
+  v++;
+
+  set_one_keycache_var("blocks_not_flushed", blocks_changed);
+  set_one_keycache_var("blocks_unused",      blocks_unused);
+  set_one_keycache_var("blocks_used",        blocks_used);
+  set_one_keycache_var("blocks_warm",        blocks_warm);
+  set_one_keycache_var("read_requests",      read_requests);
+  set_one_keycache_var("reads",              reads);
+  set_one_keycache_var("write_requests",     write_requests);
+  set_one_keycache_var("writes",             writes);
+
+  v->name= 0;
+
+  DBUG_ASSERT((char*)(v+1) <= buff + SHOW_VAR_FUNC_BUFF_SIZE);
+
+#undef set_one_keycache_var
+
+  return 0;
+}
+
 
 /*
   Variables shown by SHOW STATUS in alphabetical order
@@ -6511,21 +6985,26 @@ static int show_ssl_get_cipher_list(THD *thd, SHOW_VAR *var, char *buff)
 SHOW_VAR status_vars[]= {
   {"Aborted_clients",          (char*) &aborted_threads,        SHOW_LONG},
   {"Aborted_connects",         (char*) &aborted_connects,       SHOW_LONG},
+  {"Access_denied_errors",     (char*) offsetof(STATUS_VAR, access_denied_errors), SHOW_LONG_STATUS},
   {"Binlog_cache_disk_use",    (char*) &binlog_cache_disk_use,  SHOW_LONG},
   {"Binlog_cache_use",         (char*) &binlog_cache_use,       SHOW_LONG},
   {"Binlog_stmt_cache_disk_use",(char*) &binlog_stmt_cache_disk_use,  SHOW_LONG},
   {"Binlog_stmt_cache_use",    (char*) &binlog_stmt_cache_use,       SHOW_LONG},
+  {"Busy_time",                (char*) offsetof(STATUS_VAR, busy_time), SHOW_DOUBLE_STATUS},
   {"Bytes_received",           (char*) offsetof(STATUS_VAR, bytes_received), SHOW_LONGLONG_STATUS},
   {"Bytes_sent",               (char*) offsetof(STATUS_VAR, bytes_sent), SHOW_LONGLONG_STATUS},
+  {"Binlog_bytes_written",     (char*) offsetof(STATUS_VAR, binlog_bytes_written), SHOW_LONGLONG_STATUS},
   {"Com",                      (char*) com_status_vars, SHOW_ARRAY},
   {"Compression",              (char*) &show_net_compression, SHOW_FUNC},
   {"Connections",              (char*) &thread_id,              SHOW_LONG_NOFLUSH},
+  {"Cpu_time",                 (char*) offsetof(STATUS_VAR, cpu_time), SHOW_DOUBLE_STATUS},
   {"Created_tmp_disk_tables",  (char*) offsetof(STATUS_VAR, created_tmp_disk_tables), SHOW_LONG_STATUS},
   {"Created_tmp_files",	       (char*) &my_tmp_file_created,	SHOW_LONG},
   {"Created_tmp_tables",       (char*) offsetof(STATUS_VAR, created_tmp_tables), SHOW_LONG_STATUS},
   {"Delayed_errors",           (char*) &delayed_insert_errors,  SHOW_LONG},
   {"Delayed_insert_threads",   (char*) &delayed_insert_threads, SHOW_LONG_NOFLUSH},
   {"Delayed_writes",           (char*) &delayed_insert_writes,  SHOW_LONG},
+  {"Empty_queries",            (char*) offsetof(STATUS_VAR, empty_queries), SHOW_LONG_STATUS},
   {"Flush_commands",           (char*) &refresh_version,        SHOW_LONG_NOFLUSH},
   {"Handler_commit",           (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS},
   {"Handler_delete",           (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS},
@@ -6543,13 +7022,9 @@ SHOW_VAR status_vars[]= {
   {"Handler_savepoint_rollback",(char*) offsetof(STATUS_VAR, ha_savepoint_rollback_count), SHOW_LONG_STATUS},
   {"Handler_update",           (char*) offsetof(STATUS_VAR, ha_update_count), SHOW_LONG_STATUS},
   {"Handler_write",            (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS},
-  {"Key_blocks_not_flushed",   (char*) offsetof(KEY_CACHE, global_blocks_changed), SHOW_KEY_CACHE_LONG},
-  {"Key_blocks_unused",        (char*) offsetof(KEY_CACHE, blocks_unused), SHOW_KEY_CACHE_LONG},
-  {"Key_blocks_used",          (char*) offsetof(KEY_CACHE, blocks_used), SHOW_KEY_CACHE_LONG},
-  {"Key_read_requests",        (char*) offsetof(KEY_CACHE, global_cache_r_requests), SHOW_KEY_CACHE_LONGLONG},
-  {"Key_reads",                (char*) offsetof(KEY_CACHE, global_cache_read), SHOW_KEY_CACHE_LONGLONG},
-  {"Key_write_requests",       (char*) offsetof(KEY_CACHE, global_cache_w_requests), SHOW_KEY_CACHE_LONGLONG},
-  {"Key_writes",               (char*) offsetof(KEY_CACHE, global_cache_write), SHOW_KEY_CACHE_LONGLONG},
+  {"Handler_tmp_update",       (char*) offsetof(STATUS_VAR, ha_tmp_update_count), SHOW_LONG_STATUS},
+  {"Handler_tmp_write",        (char*) offsetof(STATUS_VAR, ha_tmp_write_count), SHOW_LONG_STATUS},
+  {"Key",                      (char*) &show_default_keycache, SHOW_FUNC},
   {"Last_query_cost",          (char*) offsetof(STATUS_VAR, last_query_cost), SHOW_DOUBLE_STATUS},
   {"Max_used_connections",     (char*) &max_used_connections,  SHOW_LONG},
   {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use,    SHOW_LONG_NOFLUSH},
@@ -6561,6 +7036,9 @@ SHOW_VAR status_vars[]= {
   {"Opened_tables",            (char*) offsetof(STATUS_VAR, opened_tables), SHOW_LONG_STATUS},
   {"Opened_table_definitions", (char*) offsetof(STATUS_VAR, opened_shares), SHOW_LONG_STATUS},
   {"Prepared_stmt_count",      (char*) &show_prepared_stmt_count, SHOW_FUNC},
+  {"Rows_sent",                (char*) offsetof(STATUS_VAR, rows_sent), SHOW_LONGLONG_STATUS},
+  {"Rows_read",                (char*) offsetof(STATUS_VAR, rows_read), SHOW_LONGLONG_STATUS},
+  {"Rows_tmp_read",            (char*) offsetof(STATUS_VAR, rows_tmp_read), SHOW_LONGLONG_STATUS},
 #ifdef HAVE_QUERY_CACHE
   {"Qcache_free_blocks",       (char*) &query_cache.free_memory_blocks, SHOW_LONG_NOFLUSH},
   {"Qcache_free_memory",       (char*) &query_cache.free_memory, SHOW_LONG_NOFLUSH},
@@ -6621,6 +7099,13 @@ SHOW_VAR status_vars[]= {
   {"Ssl_version",              (char*) &show_ssl_get_version, SHOW_FUNC},
 #endif
 #endif /* HAVE_OPENSSL */
+  {"Syncs",                    (char*) &my_sync_count,          SHOW_LONG_NOFLUSH},
+  /*
+    Expression cache used only for caching subqueries now, so its statistic
+    variables we call subquery_cache*.
+  */
+  {"Subquery_cache_hit",       (char*) &subquery_cache_hit,     SHOW_LONG},
+  {"Subquery_cache_miss",      (char*) &subquery_cache_miss,    SHOW_LONG},
   {"Table_locks_immediate",    (char*) &locks_immediate,        SHOW_LONG},
   {"Table_locks_waited",       (char*) &locks_waited,           SHOW_LONG},
 #ifdef HAVE_MMAP
@@ -6768,13 +7253,17 @@ static int mysql_init_variables(void)
   /* Things reset to zero */
   opt_skip_slave_start= opt_reckless_slave = 0;
   mysql_home[0]= pidfile_name[0]= log_error_file[0]= 0;
+#if defined(HAVE_REALPATH) && !defined(HAVE_valgrind) && !defined(HAVE_BROKEN_REALPATH)
+  /*  We can only test for sub paths if my_symlink.c is using realpath */
   myisam_test_invalid_symlink= test_if_data_home_dir;
+#endif
   opt_log= opt_slow_log= 0;
   opt_bin_log= 0;
   opt_disable_networking= opt_skip_show_db=0;
   opt_skip_name_resolve= 0;
   opt_ignore_builtin_innodb= 0;
-  opt_logname= opt_update_logname= opt_binlog_index_name= opt_slow_logname= 0;
+  opt_logname= opt_binlog_index_name= opt_slow_logname= 0;
+  opt_log_basename= 0;
   opt_tc_log_file= (char *)"tc.log";      // no hostname in tc_log file name !
   opt_secure_auth= 0;
   opt_bootstrap= opt_myisam_log= 0;
@@ -6791,6 +7280,7 @@ static int mysql_init_variables(void)
   abort_loop= select_thread_in_use= signal_thread_in_use= 0;
   ready_to_exit= shutdown_in_progress= grant_option= 0;
   aborted_threads= aborted_connects= 0;
+  subquery_cache_miss= subquery_cache_hit= 0;
   delayed_insert_threads= delayed_insert_writes= delayed_rows_in_use= 0;
   delayed_insert_errors= thread_created= 0;
   specialflag= 0;
@@ -6816,7 +7306,7 @@ static int mysql_init_variables(void)
   character_set_filesystem= &my_charset_bin;
 
   opt_specialflag= SPECIAL_ENGLISH;
-  unix_sock= ip_sock= INVALID_SOCKET;
+  unix_sock= base_ip_sock= extra_ip_sock= INVALID_SOCKET;
   mysql_home_ptr= mysql_home;
   pidfile_name_ptr= pidfile_name;
   log_error_file_ptr= log_error_file;
@@ -6837,6 +7327,7 @@ static int mysql_init_variables(void)
     sql_print_error("Cannot allocate the keycache");
     return 1;
   }
+
   /* set key_cache_hash.default_value = dflt_key_cache */
   multi_keycache_init();
 
@@ -6862,6 +7353,7 @@ static int mysql_init_variables(void)
 #ifndef DBUG_OFF
   default_dbug_option=IF_WIN("d:t:i:O,\\mysqld.trace",
 			     "d:t:i:o,/tmp/mysqld.trace");
+  current_dbug_option= default_dbug_option;
 #endif
   opt_error_log= IF_WIN(1,0);
 #ifdef ENABLED_PROFILING
@@ -6956,12 +7448,27 @@ mysqld_get_one_option(int optid,
                       char *argument)
 {
   switch(optid) {
-  case '#':
 #ifndef DBUG_OFF
-    DBUG_SET_INITIAL(argument ? argument : default_dbug_option);
-#endif
+  case '#':
+    if (!argument)
+      argument= (char*) default_dbug_option;
+    if (argument[0] == '0' && !argument[1])
+    {
+      DEBUGGER_OFF;
+      break;
+    }
+    DEBUGGER_ON;
+    if (argument[0] == '1' && !argument[1])
+      break;
+    DBUG_SET_INITIAL(argument);
     opt_endinfo=1;				/* unireg: memory allocation */
     break;
+#endif
+  case OPT_DEPRECATED_OPTION:
+    sql_print_warning("'%s' is deprecated. It does nothing and exists only "
+                      "for compatiblity with old my.cnf files.",
+                      opt->name);
+    break;
   case 'a':
     global_system_variables.sql_mode= MODE_ANSI;
     global_system_variables.tx_isolation= ISO_SERIALIZABLE;
@@ -7019,6 +7526,34 @@ mysqld_get_one_option(int optid,
   case (int) OPT_BIN_LOG:
     opt_bin_log= test(argument != disabled_my_option);
     break;
+  case (int) OPT_LOG_BASENAME:
+  {
+    if (opt_log_basename[0] == 0 || strchr(opt_log_basename, FN_EXTCHAR) ||
+        strchr(opt_log_basename,FN_LIBCHAR))
+    {
+      sql_print_error("Wrong argument for --log-basename. It can't be empty or contain '.' or '" FN_DIRSEP "'");
+      return 1;
+    }
+    if (log_error_file_ptr != disabled_my_option)
+      log_error_file_ptr= opt_log_basename;
+
+    make_default_log_name(&opt_logname, ".log", false);
+    make_default_log_name(&opt_slow_logname, "-slow.log", false);
+    make_default_log_name(&opt_bin_logname, "-bin", true);
+    make_default_log_name(&opt_binlog_index_name, "-bin.index", true);
+    make_default_log_name(&opt_relay_logname, "-relay-bin", true);
+    make_default_log_name(&opt_relaylog_index_name, "-relay-bin.index", true);
+
+    pidfile_name_ptr= pidfile_name;
+    strmake(pidfile_name, argument, sizeof(pidfile_name)-5);
+    strmov(fn_ext(pidfile_name),".pid");
+
+    /* check for errors */
+    if (!opt_bin_logname || !opt_relaylog_index_name || ! opt_logname ||
+        ! opt_slow_logname || !pidfile_name_ptr)
+      return 1;                                 // out of memory error
+    break;
+  }
 #ifdef HAVE_REPLICATION
   case (int)OPT_REPLICATE_IGNORE_DB:
   {
@@ -7032,6 +7567,7 @@ mysqld_get_one_option(int optid,
   }
   case (int)OPT_REPLICATE_REWRITE_DB:
   {
+    /* See also OPT_REWRITE_DB handling in client/mysqlbinlog.cc */
     char* key = argument,*p, *val;
 
     if (!(p= strstr(argument, "->")))
@@ -7112,23 +7648,14 @@ mysqld_get_one_option(int optid,
     WARN_DEPRECATED(NULL, 7, 0, "--log-slow-queries", "'--slow-query-log'/'--slow-query-log-file'");
     opt_slow_log= 1;
     break;
-  case (int) OPT_SKIP_NEW:
-    opt_specialflag|= SPECIAL_NO_NEW_FUNC;
-    delay_key_write_options= DELAY_KEY_WRITE_NONE;
-    myisam_concurrent_insert=0;
-    myisam_recover_options= HA_RECOVER_OFF;
-    sp_automatic_privileges=0;
-    my_use_symdir=0;
-    ha_open_options&= ~(HA_OPEN_ABORT_IF_CRASHED | HA_OPEN_DELAY_KEY_WRITE);
-#ifdef HAVE_QUERY_CACHE
-    query_cache_size=0;
-#endif
-    break;
   case (int) OPT_SAFE:
-    opt_specialflag|= SPECIAL_SAFE_MODE;
-    delay_key_write_options= DELAY_KEY_WRITE_NONE;
+    opt_specialflag|= SPECIAL_SAFE_MODE | SPECIAL_NO_NEW_FUNC;
+    delay_key_write_options= (uint) DELAY_KEY_WRITE_NONE;
     myisam_recover_options= HA_RECOVER_DEFAULT;
     ha_open_options&= ~(HA_OPEN_DELAY_KEY_WRITE);
+#ifdef HAVE_QUERY_CACHE
+    query_cache_size=0;
+#endif
     break;
   case (int) OPT_SKIP_PRIOR:
     opt_specialflag|= SPECIAL_NO_PRIOR;
@@ -7146,9 +7673,6 @@ mysqld_get_one_option(int optid,
   case (int) OPT_WANT_CORE:
     test_flags |= TEST_CORE_ON_SIGNAL;
     break;
-  case (int) OPT_SKIP_STACK_TRACE:
-    test_flags|=TEST_NO_STACKTRACE;
-    break;
   case (int) OPT_BIND_ADDRESS:
     {
       struct addrinfo *res_lst, hints;    
@@ -7182,7 +7706,7 @@ mysqld_get_one_option(int optid,
     server_id_supplied = 1;
     break;
   case OPT_ONE_THREAD:
-    thread_handling= SCHEDULER_ONE_THREAD_PER_CONNECTION;
+    thread_handling= SCHEDULER_NO_THREADS;
     break;
   case OPT_LOWER_CASE_TABLE_NAMES:
     lower_case_table_names_used= 1;
@@ -7225,7 +7749,6 @@ mysqld_get_one_option(int optid,
     break;
   case OPT_MAX_LONG_DATA_SIZE:
     max_long_data_size_used= true;
-    WARN_DEPRECATED(NULL, 5, 6, "--max_long_data_size", "'--max_allowed_packet'");
     break;
   }
   return 0;
@@ -7247,6 +7770,7 @@ mysql_getopt_value(const char *keyname, uint key_length,
   case OPT_KEY_CACHE_BLOCK_SIZE:
   case OPT_KEY_CACHE_DIVISION_LIMIT:
   case OPT_KEY_CACHE_AGE_THRESHOLD:
+  case OPT_KEY_CACHE_PARTITIONS:
   {
     KEY_CACHE *key_cache;
     if (!(key_cache= get_or_create_key_cache(keyname, key_length)))
@@ -7264,6 +7788,8 @@ mysql_getopt_value(const char *keyname, uint key_length,
       return &key_cache->param_division_limit;
     case OPT_KEY_CACHE_AGE_THRESHOLD:
       return &key_cache->param_age_threshold;
+    case OPT_KEY_CACHE_PARTITIONS:
+      return (uchar**) &key_cache->param_partitions;
     }
   }
   }
@@ -7311,7 +7837,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
        opt < my_long_options + array_elements(my_long_options) - 1;
        opt++)
     insert_dynamic(&all_options, (uchar*) opt);
-  sys_var_add_options(&all_options, sys_var::PARSE_NORMAL);
+  sys_var_add_options(&all_options, 0);
   add_terminator(&all_options);
 
   /* Skip unknown options so that they may be processed later by plugins */
@@ -7367,7 +7893,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
   }
 
   if (opt_disable_networking)
-    mysqld_port= 0;
+    mysqld_port= mysqld_extra_port= 0;
 
   if (opt_skip_show_db)
     opt_specialflag|= SPECIAL_SKIP_SHOW_DB;
@@ -7408,7 +7934,8 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
   if (opt_debugging)
   {
     /* Allow break with SIGINT, no core or stack trace */
-    test_flags|= TEST_SIGINT | TEST_NO_STACKTRACE;
+    test_flags|= TEST_SIGINT;
+    opt_stack_trace= 1;
     test_flags&= ~TEST_CORE_ON_SIGNAL;
   }
   /* Set global MyISAM variables from delay_key_write_options */
@@ -7430,6 +7957,8 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
     In most cases the global variables will not be used
   */
   my_disable_locking= myisam_single_user= test(opt_external_locking == 0);
+  my_disable_sync= opt_sync == 0;
+  my_disable_thr_alarm= opt_thread_alarm == 0;
   my_default_record_cache_size=global_system_variables.read_buff_size;
 
   global_system_variables.long_query_time= (ulonglong)
@@ -7447,12 +7976,19 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
     return 1;
 
 #ifdef EMBEDDED_LIBRARY
-  one_thread_scheduler();
+  one_thread_scheduler(thread_scheduler);
+  one_thread_scheduler(extra_thread_scheduler);
 #else
   if (thread_handling <= SCHEDULER_ONE_THREAD_PER_CONNECTION)
-    one_thread_per_connection_scheduler();
-  else                  /* thread_handling == SCHEDULER_NO_THREADS) */
-    one_thread_scheduler();
+    one_thread_per_connection_scheduler(thread_scheduler, &max_connections,
+                                        &connection_count);
+  else if (thread_handling == SCHEDULER_NO_THREADS)
+    one_thread_scheduler(thread_scheduler);
+  else
+    pool_of_threads_scheduler(thread_scheduler);  /* purecov: tested */
+  one_thread_per_connection_scheduler(extra_thread_scheduler,
+                                      &extra_max_connections,
+                                      &extra_connection_count);
 #endif
 
   global_system_variables.engine_condition_pushdown=
@@ -7563,9 +8099,7 @@ bool is_secure_file_path(char *path)
     /*
       The supplied file path might have been a file and not a directory.
     */
-    int length= (int)dirname_length(path);
-    if (length >= FN_REFLEN)
-      return FALSE;
+    size_t length= dirname_length(path);        // Guaranteed to be < FN_REFLEN
     memcpy(buff2, path, length);
     buff2[length]= '\0';
     if (length == 0 || my_realpath(buff1, buff2, 0))
@@ -7593,6 +8127,8 @@ bool is_secure_file_path(char *path)
 static int fix_paths(void)
 {
   char buff[FN_REFLEN],*pos;
+  DBUG_ENTER("fix_paths");
+
   convert_dirname(mysql_home,mysql_home,NullS);
   /* Resolve symlinks to allow 'mysql_home' to be a relative symlink */
   my_realpath(mysql_home,mysql_home,MYF(0));
@@ -7611,6 +8147,7 @@ static int fix_paths(void)
   (void) my_load_path(opt_plugin_dir, opt_plugin_dir_ptr ? opt_plugin_dir_ptr :
                                       get_relative_path(PLUGINDIR), mysql_home);
   opt_plugin_dir_ptr= opt_plugin_dir;
+  pidfile_name_ptr= pidfile_name;
 
   my_realpath(mysql_unpacked_real_data_home, mysql_real_data_home, MYF(0));
   mysql_unpacked_real_data_home_len= 
@@ -7637,7 +8174,7 @@ static int fix_paths(void)
   charsets_dir=mysql_charsets_dir;
 
   if (init_tmpdir(&mysql_tmpdir_list, opt_mysql_tmpdir))
-    return 1;
+    DBUG_RETURN(1);
   if (!opt_mysql_tmpdir)
     opt_mysql_tmpdir= mysql_tmpdir;
 #ifdef HAVE_REPLICATION
@@ -7647,7 +8184,7 @@ static int fix_paths(void)
   /*
     Convert the secure-file-priv option to system format, allowing
     a quick strcmp to check if read or write is in an allowed dir
-   */
+  */
   if (opt_secure_file_priv)
   {
     if (*opt_secure_file_priv == 0)
@@ -7662,7 +8199,7 @@ static int fix_paths(void)
       if (my_realpath(buff, opt_secure_file_priv, 0))
       {
         sql_print_warning("Failed to normalize the argument for --secure-file-priv.");
-        return 1;
+        DBUG_RETURN(1);
       }
       char *secure_file_real_path= (char *)my_malloc(FN_REFLEN, MYF(MY_FAE));
       convert_dirname(secure_file_real_path, buff, NullS);
@@ -7670,8 +8207,7 @@ static int fix_paths(void)
       opt_secure_file_priv= secure_file_real_path;
     }
   }
-  
-  return 0;
+  DBUG_RETURN(0);
 }
 
 /**
@@ -7679,12 +8215,9 @@ static int fix_paths(void)
 
   @param dir_name			Directory to test
 
-  @retval
-    -1  Don't know (Test failed)
-  @retval
-    0   File system is case sensitive
-  @retval
-    1   File system is case insensitive
+  @retval -1  Don't know (Test failed)
+  @retval  0   File system is case sensitive
+  @retval  1   File system is case insensitive
 */
 
 static int test_if_case_insensitive(const char *dir_name)
@@ -7703,7 +8236,8 @@ static int test_if_case_insensitive(const char *dir_name)
   if ((file= mysql_file_create(key_file_casetest,
                                buff, 0666, O_RDWR, MYF(0))) < 0)
   {
-    sql_print_warning("Can't create test file %s", buff);
+    if (!opt_help)
+      sql_print_warning("Can't create test file %s", buff);
     DBUG_RETURN(-1);
   }
   mysql_file_close(file, MYF(0));
@@ -7752,12 +8286,14 @@ void refresh_status(THD *thd)
 
   /* Reset thread's status variables */
   bzero((uchar*) &thd->status_var, sizeof(thd->status_var));
+  bzero((uchar*) &thd->org_status_var, sizeof(thd->org_status_var)); 
+  thd->start_bytes_received= 0;
 
   /* Reset some global variables */
   reset_status_vars();
 
   /* Reset the counters of all key caches (default and named). */
-  process_key_caches(reset_key_cache_counters);
+  process_key_caches(reset_key_cache_counters, 0);
   flush_status_time= time((time_t*) 0);
   mysql_mutex_unlock(&LOCK_status);
 
@@ -7792,258 +8328,3 @@ template class I_List<Statement>;
 template class I_List_iterator<Statement>;
 #endif
 
-#ifdef HAVE_PSI_INTERFACE
-#ifdef HAVE_MMAP
-PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool;
-#endif /* HAVE_MMAP */
-
-#ifdef HAVE_OPENSSL
-PSI_mutex_key key_LOCK_des_key_file;
-#endif /* HAVE_OPENSSL */
-
-PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
-  key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
-  key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
-  key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
-  key_LOCK_gdl, key_LOCK_global_system_variables,
-  key_LOCK_manager,
-  key_LOCK_prepared_stmt_count,
-  key_LOCK_rpl_status, key_LOCK_server_started, key_LOCK_status,
-  key_LOCK_system_variables_hash, key_LOCK_table_share, key_LOCK_thd_data,
-  key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
-  key_master_info_data_lock, key_master_info_run_lock,
-  key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
-  key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
-  key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
-  key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
-  key_PARTITION_LOCK_auto_inc;
-PSI_mutex_key key_RELAYLOG_LOCK_index;
-
-static PSI_mutex_info all_server_mutexes[]=
-{
-#ifdef HAVE_MMAP
-  { &key_PAGE_lock, "PAGE::lock", 0},
-  { &key_LOCK_sync, "TC_LOG_MMAP::LOCK_sync", 0},
-  { &key_LOCK_active, "TC_LOG_MMAP::LOCK_active", 0},
-  { &key_LOCK_pool, "TC_LOG_MMAP::LOCK_pool", 0},
-#endif /* HAVE_MMAP */
-
-#ifdef HAVE_OPENSSL
-  { &key_LOCK_des_key_file, "LOCK_des_key_file", PSI_FLAG_GLOBAL},
-#endif /* HAVE_OPENSSL */
-
-  { &key_BINLOG_LOCK_index, "MYSQL_BIN_LOG::LOCK_index", 0},
-  { &key_BINLOG_LOCK_prep_xids, "MYSQL_BIN_LOG::LOCK_prep_xids", 0},
-  { &key_RELAYLOG_LOCK_index, "MYSQL_RELAY_LOG::LOCK_index", 0},
-  { &key_delayed_insert_mutex, "Delayed_insert::mutex", 0},
-  { &key_hash_filo_lock, "hash_filo::lock", 0},
-  { &key_LOCK_active_mi, "LOCK_active_mi", PSI_FLAG_GLOBAL},
-  { &key_LOCK_connection_count, "LOCK_connection_count", PSI_FLAG_GLOBAL},
-  { &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_GLOBAL},
-  { &key_LOCK_delayed_create, "LOCK_delayed_create", PSI_FLAG_GLOBAL},
-  { &key_LOCK_delayed_insert, "LOCK_delayed_insert", PSI_FLAG_GLOBAL},
-  { &key_LOCK_delayed_status, "LOCK_delayed_status", PSI_FLAG_GLOBAL},
-  { &key_LOCK_error_log, "LOCK_error_log", PSI_FLAG_GLOBAL},
-  { &key_LOCK_gdl, "LOCK_gdl", PSI_FLAG_GLOBAL},
-  { &key_LOCK_global_system_variables, "LOCK_global_system_variables", PSI_FLAG_GLOBAL},
-  { &key_LOCK_manager, "LOCK_manager", PSI_FLAG_GLOBAL},
-  { &key_LOCK_prepared_stmt_count, "LOCK_prepared_stmt_count", PSI_FLAG_GLOBAL},
-  { &key_LOCK_rpl_status, "LOCK_rpl_status", PSI_FLAG_GLOBAL},
-  { &key_LOCK_server_started, "LOCK_server_started", PSI_FLAG_GLOBAL},
-  { &key_LOCK_status, "LOCK_status", PSI_FLAG_GLOBAL},
-  { &key_LOCK_system_variables_hash, "LOCK_system_variables_hash", PSI_FLAG_GLOBAL},
-  { &key_LOCK_table_share, "LOCK_table_share", PSI_FLAG_GLOBAL},
-  { &key_LOCK_thd_data, "THD::LOCK_thd_data", 0},
-  { &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL},
-  { &key_LOCK_uuid_generator, "LOCK_uuid_generator", PSI_FLAG_GLOBAL},
-  { &key_LOG_LOCK_log, "LOG::LOCK_log", 0},
-  { &key_master_info_data_lock, "Master_info::data_lock", 0},
-  { &key_master_info_run_lock, "Master_info::run_lock", 0},
-  { &key_mutex_slave_reporting_capability_err_lock, "Slave_reporting_capability::err_lock", 0},
-  { &key_relay_log_info_data_lock, "Relay_log_info::data_lock", 0},
-  { &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0},
-  { &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0},
-  { &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0},
-  { &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0},
-  { &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL},
-  { &key_LOG_INFO_lock, "LOG_INFO::lock", 0},
-  { &key_LOCK_thread_count, "LOCK_thread_count", PSI_FLAG_GLOBAL},
-  { &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0}
-};
-
-PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger,
-  key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave,
-  key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock;
-
-static PSI_rwlock_info all_server_rwlocks[]=
-{
-#if defined (HAVE_OPENSSL) && !defined(HAVE_YASSL)
-  { &key_rwlock_openssl, "CRYPTO_dynlock_value::lock", 0},
-#endif
-  { &key_rwlock_LOCK_grant, "LOCK_grant", PSI_FLAG_GLOBAL},
-  { &key_rwlock_LOCK_logger, "LOGGER::LOCK_logger", 0},
-  { &key_rwlock_LOCK_sys_init_connect, "LOCK_sys_init_connect", PSI_FLAG_GLOBAL},
-  { &key_rwlock_LOCK_sys_init_slave, "LOCK_sys_init_slave", PSI_FLAG_GLOBAL},
-  { &key_rwlock_LOCK_system_variables_hash, "LOCK_system_variables_hash", PSI_FLAG_GLOBAL},
-  { &key_rwlock_query_cache_query_lock, "Query_cache_query::lock", 0}
-};
-
-#ifdef HAVE_MMAP
-PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool;
-#endif /* HAVE_MMAP */
-
-PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
-  key_COND_cache_status_changed, key_COND_manager,
-  key_COND_rpl_status, key_COND_server_started,
-  key_delayed_insert_cond, key_delayed_insert_cond_client,
-  key_item_func_sleep_cond, key_master_info_data_cond,
-  key_master_info_start_cond, key_master_info_stop_cond,
-  key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
-  key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
-  key_TABLE_SHARE_cond, key_user_level_lock_cond,
-  key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
-PSI_cond_key key_RELAYLOG_update_cond;
-
-static PSI_cond_info all_server_conds[]=
-{
-#if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY)
-  { &key_COND_handler_count, "COND_handler_count", PSI_FLAG_GLOBAL},
-#endif /* _WIN32 || HAVE_SMEM && !EMBEDDED_LIBRARY */
-#ifdef HAVE_MMAP
-  { &key_PAGE_cond, "PAGE::cond", 0},
-  { &key_COND_active, "TC_LOG_MMAP::COND_active", 0},
-  { &key_COND_pool, "TC_LOG_MMAP::COND_pool", 0},
-#endif /* HAVE_MMAP */
-  { &key_BINLOG_COND_prep_xids, "MYSQL_BIN_LOG::COND_prep_xids", 0},
-  { &key_BINLOG_update_cond, "MYSQL_BIN_LOG::update_cond", 0},
-  { &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0},
-  { &key_COND_cache_status_changed, "Query_cache::COND_cache_status_changed", 0},
-  { &key_COND_manager, "COND_manager", PSI_FLAG_GLOBAL},
-  { &key_COND_rpl_status, "COND_rpl_status", PSI_FLAG_GLOBAL},
-  { &key_COND_server_started, "COND_server_started", PSI_FLAG_GLOBAL},
-  { &key_delayed_insert_cond, "Delayed_insert::cond", 0},
-  { &key_delayed_insert_cond_client, "Delayed_insert::cond_client", 0},
-  { &key_item_func_sleep_cond, "Item_func_sleep::cond", 0},
-  { &key_master_info_data_cond, "Master_info::data_cond", 0},
-  { &key_master_info_start_cond, "Master_info::start_cond", 0},
-  { &key_master_info_stop_cond, "Master_info::stop_cond", 0},
-  { &key_relay_log_info_data_cond, "Relay_log_info::data_cond", 0},
-  { &key_relay_log_info_log_space_cond, "Relay_log_info::log_space_cond", 0},
-  { &key_relay_log_info_start_cond, "Relay_log_info::start_cond", 0},
-  { &key_relay_log_info_stop_cond, "Relay_log_info::stop_cond", 0},
-  { &key_TABLE_SHARE_cond, "TABLE_SHARE::cond", 0},
-  { &key_user_level_lock_cond, "User_level_lock::cond", 0},
-  { &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL},
-  { &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL},
-  { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL}
-};
-
-PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert,
-  key_thread_handle_manager, key_thread_main,
-  key_thread_one_connection, key_thread_signal_hand;
-
-static PSI_thread_info all_server_threads[]=
-{
-#if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY)
-  { &key_thread_handle_con_namedpipes, "con_named_pipes", PSI_FLAG_GLOBAL},
-#endif /* _WIN32 || HAVE_SMEM && !EMBEDDED_LIBRARY */
-
-#if defined(HAVE_SMEM) && !defined(EMBEDDED_LIBRARY)
-  { &key_thread_handle_con_sharedmem, "con_shared_mem", PSI_FLAG_GLOBAL},
-#endif /* HAVE_SMEM && !EMBEDDED_LIBRARY */
-
-#if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY)
-  { &key_thread_handle_con_sockets, "con_sockets", PSI_FLAG_GLOBAL},
-#endif /* _WIN32 || HAVE_SMEM && !EMBEDDED_LIBRARY */
-
-#ifdef __WIN__
-  { &key_thread_handle_shutdown, "shutdown", PSI_FLAG_GLOBAL},
-#endif /* __WIN__ */
-
-  { &key_thread_bootstrap, "bootstrap", PSI_FLAG_GLOBAL},
-  { &key_thread_delayed_insert, "delayed_insert", 0},
-  { &key_thread_handle_manager, "manager", PSI_FLAG_GLOBAL},
-  { &key_thread_main, "main", PSI_FLAG_GLOBAL},
-  { &key_thread_one_connection, "one_connection", 0},
-  { &key_thread_signal_hand, "signal_handler", PSI_FLAG_GLOBAL}
-};
-
-#ifdef HAVE_MMAP
-PSI_file_key key_file_map;
-#endif /* HAVE_MMAP */
-
-PSI_file_key key_file_binlog, key_file_binlog_index, key_file_casetest,
-  key_file_dbopt, key_file_des_key_file, key_file_ERRMSG, key_select_to_file,
-  key_file_fileparser, key_file_frm, key_file_global_ddl_log, key_file_load,
-  key_file_loadfile, key_file_log_event_data, key_file_log_event_info,
-  key_file_master_info, key_file_misc, key_file_partition,
-  key_file_pid, key_file_relay_log_info, key_file_send_file, key_file_tclog,
-  key_file_trg, key_file_trn, key_file_init;
-PSI_file_key key_file_query_log, key_file_slow_log;
-PSI_file_key key_file_relaylog, key_file_relaylog_index;
-
-static PSI_file_info all_server_files[]=
-{
-#ifdef HAVE_MMAP
-  { &key_file_map, "map", 0},
-#endif /* HAVE_MMAP */
-  { &key_file_binlog, "binlog", 0},
-  { &key_file_binlog_index, "binlog_index", 0},
-  { &key_file_relaylog, "relaylog", 0},
-  { &key_file_relaylog_index, "relaylog_index", 0},
-  { &key_file_casetest, "casetest", 0},
-  { &key_file_dbopt, "dbopt", 0},
-  { &key_file_des_key_file, "des_key_file", 0},
-  { &key_file_ERRMSG, "ERRMSG", 0},
-  { &key_select_to_file, "select_to_file", 0},
-  { &key_file_fileparser, "file_parser", 0},
-  { &key_file_frm, "FRM", 0},
-  { &key_file_global_ddl_log, "global_ddl_log", 0},
-  { &key_file_load, "load", 0},
-  { &key_file_loadfile, "LOAD_FILE", 0},
-  { &key_file_log_event_data, "log_event_data", 0},
-  { &key_file_log_event_info, "log_event_info", 0},
-  { &key_file_master_info, "master_info", 0},
-  { &key_file_misc, "misc", 0},
-  { &key_file_partition, "partition", 0},
-  { &key_file_pid, "pid", 0},
-  { &key_file_query_log, "query_log", 0},
-  { &key_file_relay_log_info, "relay_log_info", 0},
-  { &key_file_send_file, "send_file", 0},
-  { &key_file_slow_log, "slow_log", 0},
-  { &key_file_tclog, "tclog", 0},
-  { &key_file_trg, "trigger_name", 0},
-  { &key_file_trn, "trigger", 0},
-  { &key_file_init, "init", 0}
-};
-
-/**
-  Initialise all the performance schema instrumentation points
-  used by the server.
-*/
-void init_server_psi_keys(void)
-{
-  const char* category= "sql";
-  int count;
-
-  if (PSI_server == NULL)
-    return;
-
-  count= array_elements(all_server_mutexes);
-  PSI_server->register_mutex(category, all_server_mutexes, count);
-
-  count= array_elements(all_server_rwlocks);
-  PSI_server->register_rwlock(category, all_server_rwlocks, count);
-
-  count= array_elements(all_server_conds);
-  PSI_server->register_cond(category, all_server_conds, count);
-
-  count= array_elements(all_server_threads);
-  PSI_server->register_thread(category, all_server_threads, count);
-
-  count= array_elements(all_server_files);
-  PSI_server->register_file(category, all_server_files, count);
-}
-
-#endif /* HAVE_PSI_INTERFACE */
-
diff --git a/sql/mysqld.h b/sql/mysqld.h
index fc9182ed02e..1ad46f94817 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -33,16 +33,6 @@ struct scheduler_functions;
 typedef struct st_mysql_const_lex_string LEX_CSTRING;
 typedef struct st_mysql_show_var SHOW_VAR;
 
-/*
-  This forward declaration is used from C files where the real
-  definition is included before.  Since C does not allow repeated
-  typedef declarations, even when identical, the definition may not be
-  repeated.
-*/
-#ifndef CHARSET_INFO_DEFINED
-typedef struct charset_info_st CHARSET_INFO;
-#endif  /* CHARSET_INFO_DEFINED */
-
 #if MAX_INDEXES <= 64
 typedef Bitmap<64>  key_map;          /* Used for finding keys */
 #else
@@ -196,7 +186,7 @@ extern const char *in_left_expr_name, *in_additional_cond, *in_having_cond;
 extern SHOW_VAR status_vars[];
 extern struct system_variables max_system_variables;
 extern struct system_status_var global_status_var;
-extern struct rand_struct sql_rand;
+extern struct my_rnd_struct sql_rand;
 extern const char *opt_date_time_formats[];
 extern handlerton *partition_hton;
 extern handlerton *myisam_hton;
@@ -214,7 +204,6 @@ extern int bootstrap_error;
 extern I_List<THD> threads;
 extern char err_shared_dir[];
 extern TYPELIB thread_handling_typelib;
-extern my_decimal decimal_zero;
 
 /*
   THR_MALLOC is a key which will be used to set/get MEM_ROOT** for a thread,
@@ -241,7 +230,7 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
   key_LOCK_prepared_stmt_count,
   key_LOCK_rpl_status, key_LOCK_server_started, key_LOCK_status,
   key_LOCK_table_share, key_LOCK_thd_data,
-  key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
+  key_LOCK_user_conn, key_LOG_LOCK_log,
   key_master_info_data_lock, key_master_info_run_lock,
   key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
   key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
@@ -249,12 +238,17 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
   key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc;
 extern PSI_mutex_key key_RELAYLOG_LOCK_index;
 
+extern PSI_mutex_key key_LOCK_stats,
+  key_LOCK_global_user_client_stats, key_LOCK_global_table_stats,
+  key_LOCK_global_index_stats, key_LOCK_wakeup_ready;
+
 extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger,
   key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave,
   key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock;
 
 #ifdef HAVE_MMAP
-extern PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool;
+extern PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool,
+                    key_COND_queue_busy;
 #endif /* HAVE_MMAP */
 
 extern PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
@@ -267,16 +261,12 @@ extern PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
   key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
   key_TABLE_SHARE_cond, key_user_level_lock_cond,
   key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
-extern PSI_cond_key key_RELAYLOG_update_cond;
+extern PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready;
 
 extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert,
   key_thread_handle_manager, key_thread_kill_server, key_thread_main,
   key_thread_one_connection, key_thread_signal_hand;
 
-#ifdef HAVE_MMAP
-extern PSI_file_key key_file_map;
-#endif /* HAVE_MMAP */
-
 extern PSI_file_key key_file_binlog, key_file_binlog_index, key_file_casetest,
   key_file_dbopt, key_file_des_key_file, key_file_ERRMSG, key_select_to_file,
   key_file_fileparser, key_file_frm, key_file_global_ddl_log, key_file_load,
@@ -331,7 +321,7 @@ extern MYSQL_PLUGIN_IMPORT key_map key_map_full;          /* Should be threaded
  */
 extern mysql_mutex_t
        LOCK_user_locks, LOCK_status,
-       LOCK_error_log, LOCK_delayed_insert, LOCK_uuid_generator,
+       LOCK_error_log, LOCK_delayed_insert, LOCK_short_uuid_generator,
        LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
        LOCK_slave_list, LOCK_active_mi, LOCK_manager,
        LOCK_global_system_variables, LOCK_user_conn,
@@ -354,6 +344,8 @@ extern char *opt_ssl_ca, *opt_ssl_capath, *opt_ssl_cert, *opt_ssl_cipher,
 
 extern MYSQL_PLUGIN_IMPORT pthread_key(THD*, THR_THD);
 
+#ifdef MYSQL_SERVER
+
 /**
   only options that need special treatment in get_one_option() deserve
   to be listed below
@@ -366,15 +358,18 @@ enum options_mysqld
   OPT_BINLOG_FORMAT,
   OPT_BINLOG_IGNORE_DB,
   OPT_BIN_LOG,
+  OPT_LOG_BASENAME,
   OPT_BOOTSTRAP,
   OPT_CONSOLE,
   OPT_DEBUG_SYNC_TIMEOUT,
   OPT_DELAY_KEY_WRITE_ALL,
+  OPT_DEPRECATED_OPTION,
   OPT_ISAM_LOG,
   OPT_KEY_BUFFER_SIZE,
   OPT_KEY_CACHE_AGE_THRESHOLD,
   OPT_KEY_CACHE_BLOCK_SIZE,
   OPT_KEY_CACHE_DIVISION_LIMIT,
+  OPT_KEY_CACHE_PARTITIONS,
   OPT_LOWER_CASE_TABLE_NAMES,
   OPT_ONE_THREAD,
   OPT_POOL_OF_THREADS,
@@ -389,7 +384,6 @@ enum options_mysqld
   OPT_SERVER_ID,
   OPT_SKIP_HOST_CACHE,
   OPT_SKIP_LOCK,
-  OPT_SKIP_NEW,
   OPT_SKIP_PRIOR,
   OPT_SKIP_RESOLVE,
   OPT_SKIP_STACK_TRACE,
@@ -406,7 +400,7 @@ enum options_mysqld
   OPT_LOG_ERROR,
   OPT_MAX_LONG_DATA_SIZE
 };
-
+#endif
 
 /**
    Query type constants (usable as bitmap flags).
@@ -418,7 +412,9 @@ enum enum_query_type
   /// In utf8.
   QT_TO_SYSTEM_CHARSET= (1 << 0),
   /// Without character set introducers.
-  QT_WITHOUT_INTRODUCERS= (1 << 1)
+  QT_WITHOUT_INTRODUCERS= (1 << 1),
+  /// view internal representation (like QT_ORDINARY except ORDER BY clause)
+  QT_VIEW_INTERNAL= (1 << 2)
 };
 
 /* query_id */
@@ -519,4 +515,27 @@ inline THD *_current_thd(void)
 #endif
 #define current_thd _current_thd()
 
+/*
+  @todo remove, make it static in ha_maria.cc
+  currently it's needed for sql_select.cc
+*/
+extern handlerton *maria_hton;
+
+extern uint extra_connection_count;
+extern my_bool opt_userstat_running, debug_assert_if_crashed_table;
+extern uint mysqld_extra_port;
+extern ulong opt_progress_report_time;
+extern ulong extra_max_connections;
+extern ulonglong denied_connections;
+extern ulong thread_created;
+extern scheduler_functions *thread_scheduler, *extra_thread_scheduler;
+extern char *opt_log_basename;
+extern my_bool opt_master_verify_checksum;
+extern my_bool opt_slave_sql_verify_checksum;
+extern ulong binlog_checksum_options;
+
+extern uint volatile global_disable_checkpoint;
+extern my_bool opt_help, opt_thread_alarm;
+extern my_bool opt_query_cache_strip_comments;
+
 #endif /* MYSQLD_INCLUDED */
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index e2cc32015b1..ccc67e64ea4 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -51,6 +51,22 @@
 #define MYSQL_CLIENT
 #endif /*EMBEDDED_LIBRARY */
 
+/*
+  to reduce the number of ifdef's in the code
+*/
+#ifdef EXTRA_DEBUG
+#define EXTRA_DEBUG_fprintf fprintf
+#define EXTRA_DEBUG_fflush fflush
+#else
+static void inline EXTRA_DEBUG_fprintf(...) {}
+static int inline EXTRA_DEBUG_fflush(...) { return 0; }
+#endif
+#ifdef MYSQL_SERVER
+#define MYSQL_SERVER_my_error my_error
+#else
+static void inline MYSQL_SERVER_my_error(...) {}
+#endif
+
 
 /*
   The following handles the differences when this is linked between the
@@ -85,19 +101,14 @@ void sql_print_error(const char *format,...);
 */
 extern uint test_flags;
 extern ulong bytes_sent, bytes_received, net_big_packet_count;
-#ifndef MYSQL_INSTANCE_MANAGER
 #ifdef HAVE_QUERY_CACHE
 #define USE_QUERY_CACHE
 extern void query_cache_insert(const char *packet, ulong length,
                                unsigned pkt_nr);
 #endif // HAVE_QUERY_CACHE
 #define update_statistics(A) A
-#endif /* MYSQL_INSTANCE_MANGER */
-#endif /* defined(MYSQL_SERVER) && !defined(MYSQL_INSTANCE_MANAGER) */
-
-#if !defined(MYSQL_SERVER) || defined(MYSQL_INSTANCE_MANAGER)
+#else
 #define update_statistics(A)
-#define thd_increment_bytes_sent(N)
 #endif
 
 #define TEST_BLOCKING		8
@@ -114,7 +125,7 @@ my_bool my_net_init(NET *net, Vio* vio)
   net->vio = vio;
   my_net_local_init(net);			/* Set some limits */
   if (!(net->buff=(uchar*) my_malloc((size_t) net->max_packet+
-				     NET_HEADER_SIZE + COMP_HEADER_SIZE,
+				     NET_HEADER_SIZE + COMP_HEADER_SIZE +1,
 				     MYF(MY_WME))))
     DBUG_RETURN(1);
   net->buff_end=net->buff+net->max_packet;
@@ -124,11 +135,9 @@ my_bool my_net_init(NET *net, Vio* vio)
   net->last_error[0]=0;
   net->compress=0; net->reading_or_writing=0;
   net->where_b = net->remain_in_buf=0;
+  net->net_skip_rest_factor= 0;
   net->last_errno=0;
   net->unused= 0;
-#if defined(MYSQL_SERVER) && !defined(EMBEDDED_LIBRARY)
-  net->skip_big_packet= FALSE;
-#endif
 
   if (vio != 0)					/* If real connection */
   {
@@ -171,9 +180,7 @@ my_bool net_realloc(NET *net, size_t length)
     /* @todo: 1 and 2 codes are identical. */
     net->error= 1;
     net->last_errno= ER_NET_PACKET_TOO_LARGE;
-#ifdef MYSQL_SERVER
-    my_error(ER_NET_PACKET_TOO_LARGE, MYF(0));
-#endif
+    MYSQL_SERVER_my_error(ER_NET_PACKET_TOO_LARGE, MYF(0));
     DBUG_RETURN(1);
   }
   pkt_length = (length+IO_SIZE-1) & ~(IO_SIZE-1); 
@@ -212,7 +219,7 @@ my_bool net_realloc(NET *net, size_t length)
     -1   Don't know if data is ready or not
 */
 
-#if !defined(EMBEDDED_LIBRARY)
+#if !defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF)
 
 static int net_data_is_ready(my_socket sd)
 {
@@ -254,34 +261,39 @@ static int net_data_is_ready(my_socket sd)
 #endif /* EMBEDDED_LIBRARY */
 
 /**
-  Remove unwanted characters from connection
-  and check if disconnected.
+   Intialize NET handler for new reads:
 
-    Read from socket until there is nothing more to read. Discard
-    what is read.
+   - Read from socket until there is nothing more to read. Discard
+     what is read.
+   - Initialize net for new net_read/net_write calls.
 
-    If there is anything when to read 'net_clear' is called this
-    normally indicates an error in the protocol.
+   If there is anything when to read 'net_clear' is called this
+   normally indicates an error in the protocol. Normally one should not
+   need to do clear the communication buffer. If one compiles without
+   -DUSE_NET_CLEAR then one wins one read call / query.
 
-    When connection is properly closed (for TCP it means with
-    a FIN packet), then select() considers a socket "ready to read",
-    in the sense that there's EOF to read, but read() returns 0.
+   When connection is properly closed (for TCP it means with
+   a FIN packet), then select() considers a socket "ready to read",
+   in the sense that there's EOF to read, but read() returns 0.
 
   @param net			NET handler
   @param clear_buffer           if <> 0, then clear all data from comm buff
 */
 
-void net_clear(NET *net, my_bool clear_buffer)
+void net_clear(NET *net, my_bool clear_buffer __attribute__((unused)))
 {
-#if !defined(EMBEDDED_LIBRARY)
-  size_t count;
-  int ready;
-#endif
   DBUG_ENTER("net_clear");
 
-#if !defined(EMBEDDED_LIBRARY)
+/*
+  We don't do a clear in case of not DBUG_OFF to catch bugs in the
+  protocol handling.
+*/
+
+#if (!defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF)) || defined(USE_NET_CLEAR)
   if (clear_buffer)
   {
+    size_t count;
+    int ready;
     while ((ready= net_data_is_ready(net->vio->sd)) > 0)
     {
       /* The socket is ready */
@@ -290,10 +302,8 @@ void net_clear(NET *net, my_bool clear_buffer)
       {
         DBUG_PRINT("info",("skipped %ld bytes from file: %s",
                            (long) count, vio_description(net->vio)));
-#if defined(EXTRA_DEBUG)
-        fprintf(stderr,"Note: net_clear() skipped %ld bytes from file: %s\n",
+        EXTRA_DEBUG_fprintf(stderr,"Note: net_clear() skipped %ld bytes from file: %s\n",
                 (long) count, vio_description(net->vio));
-#endif
       }
       else
       {
@@ -592,7 +602,7 @@ net_real_write(NET *net,const uchar *packet, size_t len)
     uchar *b;
     uint header_length=NET_HEADER_SIZE+COMP_HEADER_SIZE;
     if (!(b= (uchar*) my_malloc(len + NET_HEADER_SIZE +
-                                COMP_HEADER_SIZE, MYF(MY_WME))))
+                                COMP_HEADER_SIZE + 1, MYF(MY_WME))))
     {
       net->error= 2;
       net->last_errno= ER_OUT_OF_RESOURCES;
@@ -642,16 +652,12 @@ net_real_write(NET *net,const uchar *packet, size_t len)
 	  {
 	    if (vio_should_retry(net->vio) && retry_count++ < net->retry_count)
 	      continue;
-#ifdef EXTRA_DEBUG
-	    fprintf(stderr,
+	    EXTRA_DEBUG_fprintf(stderr,
 		    "%s: my_net_write: fcntl returned error %d, aborting thread\n",
 		    my_progname,vio_errno(net->vio));
-#endif /* EXTRA_DEBUG */
 	    net->error= 2;                     /* Close socket */
             net->last_errno= ER_NET_PACKET_TOO_LARGE;
-#ifdef MYSQL_SERVER
-            my_error(ER_NET_PACKET_TOO_LARGE, MYF(0));
-#endif
+            MYSQL_SERVER_my_error(ER_NET_PACKET_TOO_LARGE, MYF(0));
 	    goto end;
 	  }
 	  retry_count=0;
@@ -665,24 +671,20 @@ net_real_write(NET *net,const uchar *packet, size_t len)
       {
 	if (retry_count++ < net->retry_count)
 	    continue;
-#ifdef EXTRA_DEBUG
-	  fprintf(stderr, "%s: write looped, aborting thread\n",
+	  EXTRA_DEBUG_fprintf(stderr, "%s: write looped, aborting thread\n",
 		  my_progname);
-#endif /* EXTRA_DEBUG */
       }
-#if defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER)
+#ifndef MYSQL_SERVER
       if (vio_errno(net->vio) == SOCKET_EINTR)
       {
 	DBUG_PRINT("warning",("Interrupted write. Retrying..."));
 	continue;
       }
-#endif /* defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER) */
+#endif /* !defined(MYSQL_SERVER) */
       net->error= 2;				/* Close socket */
       net->last_errno= (interrupted ? ER_NET_WRITE_INTERRUPTED :
                                ER_NET_ERROR_ON_WRITE);
-#ifdef MYSQL_SERVER
-      my_error(net->last_errno, MYF(0));
-#endif /* MYSQL_SERVER */
+      MYSQL_SERVER_my_error(net->last_errno, MYF(0));
       break;
     }
     pos+=length;
@@ -699,7 +701,8 @@ net_real_write(NET *net,const uchar *packet, size_t len)
   {
     my_bool old_mode;
     thr_end_alarm(&alarmed);
-    vio_blocking(net->vio, net_blocking, &old_mode);
+    if (!net_blocking)
+      vio_blocking(net->vio, net_blocking, &old_mode);
   }
   net->reading_or_writing=0;
   DBUG_RETURN(((int) (pos != end)));
@@ -752,6 +755,7 @@ static my_bool net_safe_read(NET *net, uchar *buff, size_t length,
 static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed,
 				ALARM *alarm_buff)
 {
+  longlong limit= net->max_packet_size*net->net_skip_rest_factor;
   uint32 old=remain;
   DBUG_ENTER("my_net_skip_rest");
   DBUG_PRINT("enter",("bytes_to_skip: %u", (uint) remain));
@@ -775,11 +779,15 @@ static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed,
 	DBUG_RETURN(1);
       update_statistics(thd_increment_bytes_received(length));
       remain -= (uint32) length;
+      limit-= length;
+      if (limit < 0)
+        DBUG_RETURN(1);
     }
     if (old != MAX_PACKET_LENGTH)
       break;
     if (net_safe_read(net, net->buff, NET_HEADER_SIZE, alarmed))
       DBUG_RETURN(1);
+    limit-= NET_HEADER_SIZE;
     old=remain= uint3korr(net->buff);
     net->pkt_nr++;
   }
@@ -837,7 +845,7 @@ my_real_read(NET *net, size_t *complen)
 #if !defined(__WIN__) || defined(MYSQL_SERVER)
 	  /*
 	    We got an error that there was no data on the socket. We now set up
-	    an alarm to not 'read forever', change the socket to non blocking
+	    an alarm to not 'read forever', change the socket to the blocking
 	    mode and try again
 	  */
 	  if ((interrupted || length == 0) && !thr_alarm_in_use(&alarmed))
@@ -853,17 +861,13 @@ my_real_read(NET *net, size_t *complen)
 		DBUG_PRINT("error",
 			   ("fcntl returned error %d, aborting thread",
 			    vio_errno(net->vio)));
-#ifdef EXTRA_DEBUG
-		fprintf(stderr,
+		EXTRA_DEBUG_fprintf(stderr,
 			"%s: read: fcntl returned error %d, aborting thread\n",
 			my_progname,vio_errno(net->vio));
-#endif /* EXTRA_DEBUG */
 		len= packet_error;
 		net->error= 2;                 /* Close socket */
 	        net->last_errno= ER_NET_FCNTL_ERROR;
-#ifdef MYSQL_SERVER
-		my_error(ER_NET_FCNTL_ERROR, MYF(0));
-#endif
+		MYSQL_SERVER_my_error(ER_NET_FCNTL_ERROR, MYF(0));
 		goto end;
 	      }
 	      retry_count=0;
@@ -876,12 +880,10 @@ my_real_read(NET *net, size_t *complen)
 	  {					/* Probably in MIT threads */
 	    if (retry_count++ < net->retry_count)
 	      continue;
-#ifdef EXTRA_DEBUG
-	    fprintf(stderr, "%s: read looped with error %d, aborting thread\n",
+	    EXTRA_DEBUG_fprintf(stderr, "%s: read looped with error %d, aborting thread\n",
 		    my_progname,vio_errno(net->vio));
-#endif /* EXTRA_DEBUG */
 	  }
-#if defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER)
+#ifndef MYSQL_SERVER
 	  if (vio_errno(net->vio) == SOCKET_EINTR)
 	  {
 	    DBUG_PRINT("warning",("Interrupted read. Retrying..."));
@@ -895,9 +897,7 @@ my_real_read(NET *net, size_t *complen)
           net->last_errno= (vio_was_interrupted(net->vio) ?
                                    ER_NET_READ_INTERRUPTED :
                                    ER_NET_READ_ERROR);
-#ifdef MYSQL_SERVER
-          my_error(net->last_errno, MYF(0));
-#endif
+          MYSQL_SERVER_my_error(net->last_errno, MYF(0));
 	  goto end;
 	}
 	remain -= (uint32) length;
@@ -923,20 +923,17 @@ my_real_read(NET *net, size_t *complen)
               the server expects the client to send a file, but the client
               may reply with a new command instead.
             */
-#if defined (EXTRA_DEBUG) && !defined (MYSQL_SERVER)
-            fflush(stdout);
-	    fprintf(stderr,"Error: Packets out of order (Found: %d, expected %d)\n",
+#ifndef MYSQL_SERVER
+            EXTRA_DEBUG_fflush(stdout);
+	    EXTRA_DEBUG_fprintf(stderr,"Error: Packets out of order (Found: %d, expected %d)\n",
 		    (int) net->buff[net->where_b + 3],
 		    (uint) (uchar) net->pkt_nr);
-            fflush(stderr);
-            DBUG_ASSERT(0);
+            EXTRA_DEBUG_fflush(stderr);
 #endif
 	  }
 	  len= packet_error;
           /* Not a NET error on the client. XXX: why? */
-#ifdef MYSQL_SERVER
-	  my_error(ER_NET_PACKETS_OUT_OF_ORDER, MYF(0));
-#endif
+	  MYSQL_SERVER_my_error(ER_NET_PACKETS_OUT_OF_ORDER, MYF(0));
 	  goto end;
 	}
 	net->compress_pkt_nr= ++net->pkt_nr;
@@ -969,7 +966,6 @@ my_real_read(NET *net, size_t *complen)
 	  {
 #if defined(MYSQL_SERVER) && !defined(NO_ALARM)
 	    if (!net->compress &&
-                net->skip_big_packet &&
 		!my_net_skip_rest(net, (uint32) len, &alarmed, &alarm_buff))
 	      net->error= 3;		/* Successfully skiped packet */
 #endif
@@ -987,7 +983,8 @@ end:
   {
     my_bool old_mode;
     thr_end_alarm(&alarmed);
-    vio_blocking(net->vio, net_blocking, &old_mode);
+    if (!net_blocking)
+      vio_blocking(net->vio, net_blocking, &old_mode);
   }
   net->reading_or_writing=0;
 #ifdef DEBUG_DATA_PACKETS
@@ -1137,9 +1134,7 @@ my_net_read(NET *net)
       {
 	net->error= 2;			/* caller will close socket */
         net->last_errno= ER_NET_UNCOMPRESS_ERROR;
-#ifdef MYSQL_SERVER
-	my_error(ER_NET_UNCOMPRESS_ERROR, MYF(0));
-#endif
+	MYSQL_SERVER_my_error(ER_NET_UNCOMPRESS_ERROR, MYF(0));
         MYSQL_NET_READ_DONE(1, 0);
 	return packet_error;
       }
diff --git a/sql/opt_index_cond_pushdown.cc b/sql/opt_index_cond_pushdown.cc
new file mode 100644
index 00000000000..cee96d88438
--- /dev/null
+++ b/sql/opt_index_cond_pushdown.cc
@@ -0,0 +1,395 @@
+#include "sql_select.h"
+#include "sql_test.h"
+
+/****************************************************************************
+ * Index Condition Pushdown code starts
+ ***************************************************************************/
+/* 
+  Check if given expression uses only table fields covered by the given index
+
+  SYNOPSIS
+    uses_index_fields_only()
+      item           Expression to check
+      tbl            The table having the index
+      keyno          The index number
+      other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
+
+  DESCRIPTION
+    Check if given expression only uses fields covered by index #keyno in the
+    table tbl. The expression can use any fields in any other tables.
+    
+    The expression is guaranteed not to be AND or OR - those constructs are 
+    handled outside of this function.
+
+  RETURN
+    TRUE   Yes
+    FALSE  No
+*/
+
+bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
+                            bool other_tbls_ok)
+{
+  if (item->const_item())
+    return TRUE;
+
+  /* 
+    Don't push down the triggered conditions. Nested outer joins execution 
+    code may need to evaluate a condition several times (both triggered and
+    untriggered), and there is no way to put thi
+    TODO: Consider cloning the triggered condition and using the copies for:
+      1. push the first copy down, to have most restrictive index condition
+         possible
+      2. Put the second copy into tab->select_cond. 
+  */
+  if (item->type() == Item::FUNC_ITEM && 
+      ((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC)
+    return FALSE;
+
+  if (!(item->used_tables() & tbl->map))
+    return other_tbls_ok;
+
+  Item::Type item_type= item->type();
+  switch (item_type) {
+  case Item::FUNC_ITEM:
+    {
+      /* This is a function, apply condition recursively to arguments */
+      Item_func *item_func= (Item_func*)item;
+      Item **child;
+      Item **item_end= (item_func->arguments()) + item_func->argument_count();
+      for (child= item_func->arguments(); child != item_end; child++)
+      {
+        if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
+          return FALSE;
+      }
+      return TRUE;
+    }
+  case Item::COND_ITEM:
+    {
+      /*
+        This is a AND/OR condition. Regular AND/OR clauses are handled by
+        make_cond_for_index() which will chop off the part that can be
+        checked with index. This code is for handling non-top-level AND/ORs,
+        e.g. func(x AND y).
+      */
+      List_iterator<Item> li(*((Item_cond*)item)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+        if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
+          return FALSE;
+      }
+      return TRUE;
+    }
+  case Item::FIELD_ITEM:
+    {
+      Item_field *item_field= (Item_field*)item;
+      if (item_field->field->table != tbl) 
+        return TRUE;
+      /*
+        The below is probably a repetition - the first part checks the
+        other two, but let's play it safe:
+      */
+      return item_field->field->part_of_key.is_set(keyno) &&
+             item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
+             item_field->field->type() != MYSQL_TYPE_BLOB;
+    }
+  case Item::REF_ITEM:
+    return uses_index_fields_only(item->real_item(), tbl, keyno,
+                                  other_tbls_ok);
+  default:
+    return FALSE; /* Play it safe, don't push unknown non-const items */
+  }
+}
+
+#define ICP_COND_USES_INDEX_ONLY 10
+
+/*
+  Get a part of the condition that can be checked using only index fields
+
+  SYNOPSIS
+    make_cond_for_index()
+      cond           The source condition
+      table          The table that is partially available
+      keyno          The index in the above table. Only fields covered by the index
+                     are available
+      other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
+
+  DESCRIPTION
+    Get a part of the condition that can be checked when for the given table 
+    we have values only of fields covered by some index. The condition may
+    refer to other tables, it is assumed that we have values of all of their 
+    fields.
+
+    Example:
+      make_cond_for_index(
+         "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
+          t2, keyno(t2.key1)) 
+      will return
+        "cond(t1.field) AND cond(t2.key2)"
+
+  RETURN
+    Index condition, or NULL if no condition could be inferred.
+*/
+
+Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
+                          bool other_tbls_ok)
+{
+  if (!cond)
+    return NULL;
+  if (cond->type() == Item::COND_ITEM)
+  {
+    uint n_marked= 0;
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      table_map used_tables= 0;
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
+	if (fix)
+        {
+	  new_cond->argument_list()->push_back(fix);
+          used_tables|= fix->used_tables();
+        }
+        if (test(item->marker == ICP_COND_USES_INDEX_ONLY))
+        {
+          n_marked++;
+          item->marker= 0;
+        } 
+      }
+      if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
+        cond->marker= ICP_COND_USES_INDEX_ONLY;
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;
+      case 1:
+        new_cond->used_tables_cache= used_tables;
+	return new_cond->argument_list()->head();
+      default:
+	new_cond->quick_fix_field();
+        new_cond->used_tables_cache= used_tables;
+	return new_cond;
+      }
+    }
+    else /* It's OR */
+    {
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
+	if (!fix)
+	  return (COND*) 0;
+	new_cond->argument_list()->push_back(fix);
+        if (test(item->marker == ICP_COND_USES_INDEX_ONLY))
+        {
+          n_marked++;
+          item->marker= 0;
+        } 
+      }
+      if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
+        cond->marker= ICP_COND_USES_INDEX_ONLY;
+      new_cond->quick_fix_field();
+      new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+
+  if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
+    return (COND*) 0;
+  cond->marker= ICP_COND_USES_INDEX_ONLY;
+  return cond;
+}
+
+
+Item *make_cond_remainder(Item *cond, bool exclude_index)
+{
+  if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY)
+    return 0; /* Already checked */
+
+  if (cond->type() == Item::COND_ITEM)
+  {
+    table_map tbl_map= 0;
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      /* Create new top level AND item */
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_remainder(item, exclude_index);
+	if (fix)
+        {
+	  new_cond->argument_list()->push_back(fix);
+          tbl_map |= fix->used_tables();
+        }
+      }
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;
+      case 1:
+	return new_cond->argument_list()->head();
+      default:
+	new_cond->quick_fix_field();
+        ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
+	return new_cond;
+      }
+    }
+    else /* It's OR */
+    {
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_remainder(item, FALSE);
+	if (!fix)
+	  return (COND*) 0;
+	new_cond->argument_list()->push_back(fix);
+        tbl_map |= fix->used_tables();
+      }
+      new_cond->quick_fix_field();
+      ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+  return cond;
+}
+
+
+/*
+  Try to extract and push the index condition
+
+  SYNOPSIS
+    push_index_cond()
+      tab            A join tab that has tab->table->file and its condition
+                     in tab->select_cond
+      keyno          Index for which extract and push the condition
+
+  DESCRIPTION
+    Try to extract and push the index condition down to table handler
+*/
+
+void push_index_cond(JOIN_TAB *tab, uint keyno)
+{
+  DBUG_ENTER("push_index_cond");
+  Item *idx_cond;
+  bool do_index_cond_pushdown=
+    ((tab->table->file->index_flags(keyno, 0, 1) &
+      HA_DO_INDEX_COND_PUSHDOWN) &&
+     optimizer_flag(tab->join->thd, OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN));
+
+  /*
+    Do not try index condition pushdown on indexes which have partially-covered
+    columns. Unpacking from a column prefix into index tuple is not a supported 
+    operation in some engines, see e.g. MySQL BUG#42991.
+    TODO: a better solution would be not to consider partially-covered columns
+    as parts of the index and still produce/check index condition for
+    fully-covered index columns.
+  */
+  KEY *key_info= tab->table->key_info + keyno;
+  for (uint kp= 0; kp < key_info->key_parts; kp++)
+  {
+    if ((key_info->key_part[kp].key_part_flag & HA_PART_KEY_SEG))
+    {
+      do_index_cond_pushdown= FALSE;
+      break;
+    }
+  }
+
+  if (do_index_cond_pushdown)
+  {
+    DBUG_EXECUTE("where",
+                 print_where(tab->select_cond, "full cond", QT_ORDINARY););
+
+    idx_cond= make_cond_for_index(tab->select_cond, tab->table, keyno,
+                                  tab->icp_other_tables_ok);
+
+    DBUG_EXECUTE("where",
+                 print_where(idx_cond, "idx cond", QT_ORDINARY););
+
+    if (idx_cond)
+    {
+      Item *idx_remainder_cond= 0;
+      tab->pre_idx_push_select_cond= tab->select_cond;
+      /*
+        For BKA cache we store condition to special BKA cache field
+        because evaluation of the condition requires additional operations
+        before the evaluation. This condition is used in 
+        JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
+      */
+      if (tab->use_join_cache &&
+          /*
+            if cache is used then the value is TRUE only 
+            for BKA[_UNIQUE] cache (see check_join_cache_usage func).
+          */
+          tab->icp_other_tables_ok &&
+          (idx_cond->used_tables() &
+           ~(tab->table->map | tab->join->const_table_map)))
+        tab->cache_idx_cond= idx_cond;
+      else
+        idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond);
+
+      /*
+        Disable eq_ref's "lookup cache" if we've pushed down an index
+        condition. 
+        TODO: This check happens to work on current ICP implementations, but
+        there may exist a compliant implementation that will not work 
+        correctly with it. Sort this out when we stabilize the condition
+        pushdown APIs.
+      */
+      if (idx_remainder_cond != idx_cond)
+        tab->ref.disable_cache= TRUE;
+
+      Item *row_cond= tab->idx_cond_fact_out ? 
+                        make_cond_remainder(tab->select_cond, TRUE) :
+	                tab->pre_idx_push_select_cond;
+
+      DBUG_EXECUTE("where",
+                   print_where(row_cond, "remainder cond", QT_ORDINARY););
+      
+      if (row_cond)
+      {
+        if (!idx_remainder_cond)
+          tab->select_cond= row_cond;
+        else
+        {
+          COND *new_cond= new Item_cond_and(row_cond, idx_remainder_cond);
+          tab->select_cond= new_cond;
+	  tab->select_cond->quick_fix_field();
+          ((Item_cond_and*)tab->select_cond)->used_tables_cache= 
+            row_cond->used_tables() | idx_remainder_cond->used_tables();
+        }
+      }
+      else
+        tab->select_cond= idx_remainder_cond;
+      if (tab->select)
+      {
+        DBUG_EXECUTE("where",
+                     print_where(tab->select->cond,
+                                 "select_cond",
+                                 QT_ORDINARY););
+
+        tab->select->cond= tab->select_cond;
+        tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond;
+      }
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 730024f4389..9a61317b1a7 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -34,7 +35,7 @@
     
     The lists are returned in form of complicated structure of interlinked
     SEL_TREE/SEL_IMERGE/SEL_ARG objects.
-    See check_quick_keys, find_used_partitions for examples of how to walk 
+    See quick_range_seq_next, find_used_partitions for examples of how to walk 
     this structure.
     All direct "users" of this module are located within this file, too.
 
@@ -59,6 +60,48 @@
 
   Record retrieval code for range/index_merge/groupby-min-max.
     Implementations of QUICK_*_SELECT classes.
+
+  KeyTupleFormat
+  ~~~~~~~~~~~~~~
+  The code in this file (and elsewhere) makes operations on key value tuples.
+  Those tuples are stored in the following format:
+  
+  The tuple is a sequence of key part values. The length of key part value
+  depends only on its type (and not depends on the what value is stored)
+  
+    KeyTuple: keypart1-data, keypart2-data, ...
+  
+  The value of each keypart is stored in the following format:
+  
+    keypart_data: [isnull_byte] keypart-value-bytes
+
+  If a keypart may have a NULL value (key_part->field->real_maybe_null() can
+  be used to check this), then the first byte is a NULL indicator with the 
+  following valid values:
+    1  - keypart has NULL value.
+    0  - keypart has non-NULL value.
+
+  <questionable-statement> If isnull_byte==1 (NULL value), then the following
+  keypart->length bytes must be 0.
+  </questionable-statement>
+
+  keypart-value-bytes holds the value. Its format depends on the field type.
+  The length of keypart-value-bytes may or may not depend on the value being
+  stored. The default is that length is static and equal to 
+  KEY_PART_INFO::length.
+  
+  Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the 
+  value:
+  
+     keypart-value-bytes: value_length value_bytes
+
+  The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
+
+  See key_copy() and key_restore() for code to move data between index tuple
+  and table record
+
+  CAUTION: the above description is only sergefp's understanding of the 
+           subject and may omit some details.
 */
 
 #ifdef USE_PRAGMA_IMPLEMENTATION
@@ -268,6 +311,11 @@ public:
   uint8 part;					// Which key part
   uint8 maybe_null;
   /* 
+    The ordinal number the least significant component encountered in
+    the ranges of the SEL_ARG tree (the first component has number 1) 
+  */
+  uint16 max_part_no; 
+  /* 
     Number of children of this element in the RB-tree, plus 1 for this
     element itself.
   */
@@ -300,8 +348,9 @@ public:
   SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value,
 	  uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
   SEL_ARG(enum Type type_arg)
-    :min_flag(0),elements(1),use_count(1),left(0),right(0),next_key_part(0),
-    color(BLACK), type(type_arg)
+    :min_flag(0), max_part_no(0) /* first key part means 1. 0 mean 'no parts'*/, 
+     elements(1),use_count(1),left(0),right(0),
+     next_key_part(0), color(BLACK), type(type_arg)
   {}
   inline bool is_same(SEL_ARG *arg)
   {
@@ -409,6 +458,7 @@ public:
   /* returns a number of keypart values (0 or 1) appended to the key buffer */
   int store_min(uint length, uchar **min_key,uint min_key_flag)
   {
+    /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
     if ((min_flag & GEOM_FLAG) ||
         (!(min_flag & NO_MIN_RANGE) &&
 	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
@@ -523,6 +573,11 @@ public:
 	  pos->increment_use_count(count);
     }
   }
+  void incr_refs()
+  {
+    increment_use_count(1);
+    use_count++;
+  }
   void free_tree()
   {
     for (SEL_ARG *pos=first(); pos ; pos=pos->next)
@@ -584,7 +639,100 @@ public:
 
 class SEL_IMERGE;
 
+#define CLONE_KEY1_MAYBE 1
+#define CLONE_KEY2_MAYBE 2
+#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
+
 
+/*
+  While objects of the class SEL_ARG represent ranges for indexes or
+  index infixes (including ranges for index prefixes and index suffixes),
+  objects of the class SEL_TREE represent AND/OR formulas of such ranges.
+  Currently an AND/OR formula represented by a SEL_TREE object can have
+  at most three levels: 
+
+    <SEL_TREE formula> ::= 
+      [ <SEL_RANGE_TREE formula> AND ]
+      [ <SEL_IMERGE formula> [ AND <SEL_IMERGE formula> ...] ]
+
+    <SEL_RANGE_TREE formula> ::=
+      <SEL_ARG formula> [ AND  <SEL_ARG_formula> ... ]
+
+    <SEL_IMERGE formula> ::=  
+      <SEL_RANGE_TREE formula> [ OR <SEL_RANGE_TREE formula> ]
+
+  As we can see from the above definitions:
+   - SEL_RANGE_TREE formula is a conjunction of SEL_ARG formulas
+   - SEL_IMERGE formula is a disjunction of SEL_RANGE_TREE formulas
+   - SEL_TREE formula is a conjunction of a SEL_RANGE_TREE formula
+     and SEL_IMERGE formulas. 
+  It's required above that a SEL_TREE formula has at least one conjunct.
+
+  Usually we will consider normalized SEL_RANGE_TREE formulas where we use
+  TRUE as conjunct members for those indexes whose SEL_ARG trees are empty.
+  
+  We will call an SEL_TREE object simply 'tree'. 
+  The part of a tree that represents SEL_RANGE_TREE formula is called
+  'range part' of the tree while the remaining part is called 'imerge part'. 
+  If a tree contains only a range part then we call such a tree 'range tree'.
+  Components of a range tree that represent SEL_ARG formulas are called ranges.
+  If a tree does not contain any range part we call such a tree 'imerge tree'.
+  Components of the imerge part of a tree that represent SEL_IMERGE formula
+  are called imerges.
+
+  Usually we'll designate:
+    SEL_TREE formulas         by T_1,...,T_k
+    SEL_ARG formulas          by R_1,...,R_k
+    SEL_RANGE_TREE formulas   by RT_1,...,RT_k
+    SEL_IMERGE formulas       by M_1,...,M_k
+  Accordingly we'll use:
+    t_1,...,t_k - to designate trees representing T_1,...,T_k
+    r_1,...,r_k - to designate ranges representing R_1,...,R_k 
+    rt_1,...,r_tk - to designate range trees representing RT_1,...,RT_k
+    m_1,...,m_k - to designate imerges representing M_1,...,M_k
+
+  SEL_TREE objects are usually built from WHERE conditions or
+  ON expressions.
+  A SEL_TREE object always represents an inference of the condition it is
+  built from. Therefore, if a row satisfies a SEL_TREE formula it also
+  satisfies the condition it is built from.
+
+  The following transformations of tree t representing SEL_TREE formula T 
+  yield a new tree t1 thar represents an inference of T: T=>T1.  
+    (1) remove any of SEL_ARG tree from the range part of t
+    (2) remove any imerge from the tree t 
+    (3) remove any of SEL_ARG tree from any range tree contained
+        in any imerge of tree   
+ 
+  Since the basic blocks of any SEL_TREE objects are ranges, SEL_TREE
+  objects in many cases can be effectively used to filter out a big part
+  of table rows that do not satisfy WHERE/IN conditions utilizing
+  only single or multiple range index scans.
+
+  A single range index scan is constructed for a range tree that contains
+  only one SEL_ARG object for an index or an index prefix.
+  An index intersection scan can be constructed for a range tree
+  that contains several SEL_ARG objects. Currently index intersection
+  scans are constructed only for single-point ranges.
+  An index merge scan is constructed for a imerge tree that contains only
+  one imerge. If range trees of this imerge contain only single-point merges
+  than a union of index intersections can be built.
+
+  Usually the tree built by the range optimizer for a query table contains
+  more than one range in the range part, and additionally may contain some
+  imerges in the imerge part. The range optimizer evaluates all of them one
+  by one and chooses the range or the imerge that provides the cheapest
+  single or multiple range index scan of the table.  According to rules 
+  (1)-(3) this scan always filter out only those rows that do not satisfy
+  the query conditions. 
+
+  For any condition the SEL_TREE object for it is built in a bottom up
+  manner starting from the range trees for the predicates. The tree_and
+  function builds a tree for any conjunction of formulas from the trees
+  for its conjuncts. The tree_or function builds a tree for any disjunction
+  of formulas from the trees for its disjuncts.    
+*/ 
+  
 class SEL_TREE :public Sql_alloc
 {
 public:
@@ -600,7 +748,7 @@ public:
     keys_map.clear_all();
     bzero((char*) keys,sizeof(keys));
   }
-  SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param);
+  SEL_TREE(SEL_TREE *arg, bool without_merges, RANGE_OPT_PARAM *param);
   /*
     Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
     keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
@@ -620,9 +768,15 @@ public:
   key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
   uint    n_ror_scans;     /* number of set bits in ror_scans_map */
 
+  struct st_index_scan_info **index_scans;     /* list of index scans */
+  struct st_index_scan_info **index_scans_end; /* last index scan */
+
   struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
   struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
   /* Note that #records for each key scan is stored in table->quick_rows */
+
+  bool without_ranges() { return keys_map.is_clear_all(); }
+  bool without_imerges() { return merges.is_empty(); }
 };
 
 class RANGE_OPT_PARAM
@@ -653,6 +807,11 @@ public:
   */
   bool using_real_indexes;
   
+  /*
+    Aggressively remove "scans" that do not have conditions on first
+    keyparts. Such scans are usable when doing partition pruning but not
+    regular range optimization.
+  */
   bool remove_jump_scans;
   
   /*
@@ -670,12 +829,14 @@ public:
 
   /* Number of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */
   uint alloced_sel_args; 
+  bool force_default_mrr;
+  KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
 };
 
 class PARAM : public RANGE_OPT_PARAM
 {
 public:
-  KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
+  ha_rows quick_rows[MAX_KEY];
   longlong baseflag;
   uint max_key_part, range_count;
 
@@ -697,13 +858,16 @@ public:
   uint8 first_null_comp; /* first null component if any, 0 - otherwise */
 };
 
+
 class TABLE_READ_PLAN;
   class TRP_RANGE;
   class TRP_ROR_INTERSECT;
   class TRP_ROR_UNION;
-  class TRP_ROR_INDEX_MERGE;
+  class TRP_INDEX_INTERSECT;
+  class TRP_INDEX_MERGE;
   class TRP_GROUP_MIN_MAX;
 
+struct st_index_scan_info;
 struct st_ror_scan_info;
 
 static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
@@ -715,20 +879,22 @@ static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond);
 
 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
-static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree,
-                                  bool update_tbl_stats);
-static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
-                                uchar *min_key, uint min_key_flag, int,
-                                uchar *max_key, uint max_key_flag, int);
+static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
+                                  SEL_ARG *tree, bool update_tbl_stats, 
+                                  uint *mrr_flags, uint *bufsize,
+                                  COST_VECT *cost);
 
 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
-                                     SEL_ARG *key_tree,
-                                     MEM_ROOT *alloc = NULL);
+                                     SEL_ARG *key_tree, uint mrr_flags, 
+                                     uint mrr_buf_size, MEM_ROOT *alloc);
 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                        bool index_read_must_be_used,
                                        bool update_tbl_stats,
                                        double read_time);
 static
+TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
+                                              double read_time);
+static
 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
                                           double read_time,
                                           bool *are_all_covering);
@@ -740,10 +906,12 @@ static
 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
                                          double read_time);
 static
+TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
+                                        TRP_INDEX_MERGE *imerge_trp,
+                                        double read_time);
+static
 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
                                           double read_time);
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
-                                       int keynr);
 
 #ifndef DBUG_OFF
 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
@@ -754,11 +922,15 @@ static void print_ror_scans_arr(TABLE *table, const char *msg,
 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
 #endif
 
-static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
-static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
+static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,
+                          SEL_TREE *tree1, SEL_TREE *tree2);
+static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,
+                         SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
-static SEL_ARG *key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2);
-static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
+static SEL_ARG *key_or(RANGE_OPT_PARAM *param,
+                       SEL_ARG *key1, SEL_ARG *key2);
+static SEL_ARG *key_and(RANGE_OPT_PARAM *param,
+                        SEL_ARG *key1, SEL_ARG *key2,
                         uint clone_flag);
 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
@@ -769,7 +941,25 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
                              uint length);
-bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
+
+#include "opt_range_mrr.cc"
+
+static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, 
+                                       key_map *common_keys);
+static void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param,
+                                          SEL_TREE *tree);
+
+static bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
+                                  SEL_TREE *tree1, SEL_TREE *tree2, 
+                                  key_map *common_keys);
+static bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
+                                   SEL_TREE *tree1, SEL_TREE *tree2,
+                                   key_map common_keys);
+static int and_range_trees(RANGE_OPT_PARAM *param,
+                           SEL_TREE *tree1, SEL_TREE *tree2,
+                           SEL_TREE *result);
+static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree);
 
 
 /*
@@ -801,23 +991,39 @@ public:
     trees_next(trees),
     trees_end(trees + PREALLOCED_TREES)
   {}
-  SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param);
+  SEL_IMERGE (SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param);
   int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
-  int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
-  int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
+  bool have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree);
+  int and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, 
+                   SEL_IMERGE *new_imerge);
+  int or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
+                              uint n_init_trees, 
+                              SEL_TREE *new_tree,
+                              bool is_first_check_pass,
+                              bool *is_last_check_pass);
+  int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
+                                uint n_init_trees,
+                                SEL_IMERGE* imerge,
+                                bool is_first_check_pass,
+                                bool *is_last_check_pass);
 };
 
 
 /*
-  Add SEL_TREE to this index_merge without any checks,
+  Add a range tree to the range trees of this imerge 
 
-  NOTES
-    This function implements the following:
-      (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
+  SYNOPSIS
+    or_sel_tree()
+      param                  Context info for the operation         
+      tree                   SEL_TREE to add to this imerge 
+
+  DESCRIPTION 
+    The function just adds the range tree 'tree' to the range trees
+    of this imerge.
 
   RETURN
-     0 - OK
-    -1 - Out of memory.
+     0   if the operation is success
+    -1   if the function runs out memory
 */
 
 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
@@ -842,96 +1048,303 @@ int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
 
 
 /*
-  Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
-  combining new_tree with one of the trees in this SEL_IMERGE if they both
-  have SEL_ARGs for the same key.
+  Check if any of the range trees of this imerge intersects with a given tree 
 
   SYNOPSIS
-    or_sel_tree_with_checks()
-      param    PARAM from SQL_SELECT::test_quick_select
-      new_tree SEL_TREE with type KEY or KEY_SMALLER.
+    have_common_keys()
+      param    Context info for the function
+      tree     SEL_TREE intersection with the imerge range trees is checked for 
 
-  NOTES
-    This does the following:
-    (t_1||...||t_k)||new_tree =
-     either
-       = (t_1||...||t_k||new_tree)
-     or
-       = (t_1||....||(t_j|| new_tree)||...||t_k),
-
-     where t_i, y are SEL_TREEs.
-    new_tree is combined with the first t_j it has a SEL_ARG on common
-    key with. As a consequence of this, choice of keys to do index_merge
-    read may depend on the order of conditions in WHERE part of the query.
+  DESCRIPTION
+    The function checks whether there is any range tree rt_i in this imerge
+    such that there are some indexes for which ranges are defined in both
+    rt_i and the range part of the SEL_TREE tree.  
+    To check this the function calls the function sel_trees_have_common_keys.
+
+  RETURN 
+    TRUE    if there are such range trees in this imerge
+    FALSE   otherwise
+*/
 
+bool SEL_IMERGE::have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree)
+{
+  for (SEL_TREE** or_tree= trees, **bound= trees_next;
+       or_tree != bound; or_tree++)
+  {
+    key_map common_keys;
+    if (sel_trees_have_common_keys(*or_tree, tree, &common_keys))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/* 
+  Perform AND operation for this imerge and the range part of a tree
+
+  SYNOPSIS
+    and_sel_tree()
+      param           Context info for the operation
+      tree            SEL_TREE for the second operand of the operation
+      new_imerge  OUT imerge for the result of the operation
+
+  DESCRIPTION
+    This function performs AND operation for this imerge m and the
+    range part of the SEL_TREE tree rt. In other words the function
+    pushes rt into this imerge. The resulting imerge is returned in
+    the parameter new_imerge.
+    If this imerge m represent the formula
+      RT_1 OR ... OR RT_k
+    then the resulting imerge of the function represents the formula
+      (RT_1 AND RT) OR ... OR (RT_k AND RT)
+    The function calls the function and_range_trees to construct the
+    range tree representing (RT_i AND RT).
+    
+  NOTE
+    The function may return an empty imerge without any range trees.
+    This happens when each call of and_range_trees returns an 
+    impossible range tree (SEL_TREE::IMPOSSIBLE).
+    Example: (key1 < 2 AND key2 > 10) AND (key1 > 4 OR key2 < 6).
+         
   RETURN
-    0  OK
-    1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
-       and (*this) should be discarded.
-   -1  An error occurred.
+     0  if the operation is a success
+    -1  otherwise: there is not enough memory to perform the operation
 */
 
-int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
+int SEL_IMERGE::and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree,
+                             SEL_IMERGE *new_imerge)
 {
-  for (SEL_TREE** tree = trees;
-       tree != trees_next;
-       tree++)
+  for (SEL_TREE** or_tree= trees; or_tree != trees_next; or_tree++) 
   {
-    if (sel_trees_can_be_ored(*tree, new_tree, param))
+    SEL_TREE *res_or_tree= 0;
+    if (!(res_or_tree= new SEL_TREE()))
+      return (-1);
+    if (!and_range_trees(param, *or_tree, tree, res_or_tree))
     {
-      *tree = tree_or(param, *tree, new_tree);
-      if (!*tree)
-        return 1;
-      if (((*tree)->type == SEL_TREE::MAYBE) ||
-          ((*tree)->type == SEL_TREE::ALWAYS))
+      if (new_imerge->or_sel_tree(param, res_or_tree))
+        return (-1);
+    }        
+  }
+  return 0;
+}      
+
+
+/*
+  Perform OR operation on this imerge and the range part of a tree
+
+  SYNOPSIS
+    or_sel_tree_with_checks()
+      param                  Context info for the operation 
+      n_trees                Number of trees in this imerge to check for oring        
+      tree                   SEL_TREE whose range part is to be ored 
+      is_first_check_pass    <=> the first call of the function for this imerge  
+      is_last_check_pass OUT <=> no more calls of the function for this imerge
+
+  DESCRIPTION
+    The function performs OR operation on this imerge m and the range part
+    of the SEL_TREE tree rt. It always replaces this imerge with the result
+    of the operation.
+ 
+    The operation can be performed in two different modes: with
+    is_first_check_pass==TRUE and is_first_check_pass==FALSE, transforming
+    this imerge differently.
+
+    Given this imerge represents the formula
+      RT_1 OR ... OR RT_k:
+
+    1. In the first mode, when is_first_check_pass==TRUE :
+      1.1. If rt must be ored(see the function sel_trees_must_be_ored) with
+           some rt_j (there may be only one such range tree in the imerge)
+           then the function produces an imerge representing the formula
+             RT_1 OR ... OR (RT_j OR RT) OR ... OR RT_k,
+           where the tree for (RT_j OR RT) is built by oring the pairs
+           of SEL_ARG trees for the corresponding indexes
+      1.2. Otherwise the function produces the imerge representing the formula:
+           RT_1 OR ... OR RT_k OR RT.
+
+    2. In the second mode, when is_first_check_pass==FALSE :
+      2.1. For each rt_j in the imerge that can be ored (see the function
+           sel_trees_can_be_ored), but not must be ored, with rt the function
+           replaces rt_j for a range tree such that for each index for which
+           ranges are defined in both in rt_j and rt  the tree contains the
+           result of oring of these ranges.
+      2.2. In other cases the function does not produce any imerge.
+
+    When is_first_check==TRUE the function returns FALSE in the parameter
+    is_last_check_pass if there is no rt_j such that rt_j can be ored with rt,
+    but, at the same time, it's not true that rt_j must be ored with rt.
+    When is_first_check==FALSE the function always returns FALSE in the
+    parameter is_last_check_pass.    
+          
+  RETURN
+    1  The result of oring of rt_j and rt that must be ored returns the
+       the range tree with type==SEL_TREE::ALWAYS
+       (in this case the imerge m should be discarded)
+   -1  The function runs out of memory
+    0  in all other cases 
+*/
+
+int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
+                                        uint n_trees,
+                                        SEL_TREE *tree,
+                                        bool is_first_check_pass,
+                                        bool *is_last_check_pass)
+{
+  bool was_ored= FALSE;
+  *is_last_check_pass= TRUE;
+  SEL_TREE** or_tree = trees;
+  for (uint i= 0; i < n_trees; i++, or_tree++)
+  {
+    SEL_TREE *result= 0;
+    key_map result_keys;
+    key_map ored_keys;
+    if (sel_trees_can_be_ored(param, *or_tree, tree, &ored_keys))
+    {
+      bool must_be_ored= sel_trees_must_be_ored(param, *or_tree, tree,
+                                                ored_keys);
+      if (must_be_ored || !is_first_check_pass) 
+      {
+        result_keys.clear_all();
+        result= *or_tree;
+        for (uint key_no= 0; key_no < param->keys; key_no++)
+        {
+          if (!ored_keys.is_set(key_no))
+	  {
+            result->keys[key_no]= 0;
+	    continue;
+          }
+          SEL_ARG *key1= (*or_tree)->keys[key_no];
+          SEL_ARG *key2= tree->keys[key_no];
+          key2->incr_refs();
+          if ((result->keys[key_no]= key_or(param, key1, key2)))
+          {
+            
+            result_keys.set_bit(key_no);
+#ifdef EXTRA_DEBUG
+            if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
+	    {
+              key1= result->keys[key_no]; 
+              (key1)->test_use_count(key1);
+            }
+#endif
+          }       
+        }
+      }
+      else if(is_first_check_pass) 
+        *is_last_check_pass= FALSE;
+    } 
+
+    if (result)
+    {
+      if (result_keys.is_clear_all())
+        result->type= SEL_TREE::ALWAYS;
+      *is_last_check_pass= TRUE;
+      if ((result->type == SEL_TREE::MAYBE) ||
+          (result->type == SEL_TREE::ALWAYS))
         return 1;
       /* SEL_TREE::IMPOSSIBLE is impossible here */
-      return 0;
+      result->keys_map= result_keys; 
+      *or_tree= result;
+      if (is_first_check_pass)
+        return 0;
+      was_ored= TRUE;
     }
   }
+  if (was_ored)
+    return 0;
 
-  /* New tree cannot be combined with any of existing trees. */
-  return or_sel_tree(param, new_tree);
+  if (!*is_last_check_pass && 
+      !(tree= new SEL_TREE(tree, FALSE, param)))
+    return (-1);
+  return or_sel_tree(param, tree);
 }
 
 
 /*
-  Perform OR operation on this index_merge and supplied index_merge list.
+  Perform OR operation on this imerge and and another imerge
+
+  SYNOPSIS
+    or_sel_imerge_with_checks()
+      param                  Context info for the operation 
+      n_trees           Number of trees in this imerge to check for oring        
+      imerge                 The second operand of the operation 
+      is_first_check_pass    <=> the first call of the function for this imerge  
+      is_last_check_pass OUT <=> no more calls of the function for this imerge
 
+  DESCRIPTION
+    For each range tree rt from 'imerge' the function calls the method
+    SEL_IMERGE::or_sel_tree_with_checks that performs OR operation on this
+    SEL_IMERGE object m and the tree rt. The mode of the operation is
+    specified by the parameter is_first_check_pass. Each call of
+    SEL_IMERGE::or_sel_tree_with_checks transforms this SEL_IMERGE object m.
+    The function returns FALSE in the prameter is_last_check_pass if
+    at least one of the calls of SEL_IMERGE::or_sel_tree_with_checks
+    returns FALSE as the value of its last parameter. 
+    
   RETURN
-    0 - OK
-    1 - One of conditions in result is always TRUE and this SEL_IMERGE
-        should be discarded.
-   -1 - An error occurred
+    1  One of the calls of SEL_IMERGE::or_sel_tree_with_checks returns 1.
+       (in this case the imerge m should be discarded)
+   -1  The function runs out of memory
+    0  in all other cases 
 */
 
-int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
-{
-  for (SEL_TREE** tree= imerge->trees;
-       tree != imerge->trees_next;
-       tree++)
-  {
-    if (or_sel_tree_with_checks(param, *tree))
-      return 1;
+int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
+                                          uint n_trees,
+                                          SEL_IMERGE* imerge,
+                                          bool is_first_check_pass,
+                                          bool *is_last_check_pass)
+{
+  *is_last_check_pass= TRUE;
+  SEL_TREE** tree= imerge->trees;
+  SEL_TREE** tree_end= imerge->trees_next;
+  for ( ; tree < tree_end; tree++)
+  {
+    uint rc;
+    bool is_last= TRUE; 
+    rc= or_sel_tree_with_checks(param, n_trees, *tree, 
+                               is_first_check_pass, &is_last);
+    if (!is_last)
+      *is_last_check_pass= FALSE;
+    if (rc)
+      return rc;
   }
   return 0;
 }
 
 
-SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc()
+/*
+  Copy constructor for SEL_TREE objects
+
+  SYNOPSIS
+    SEL_TREE
+      arg            The source tree for the constructor
+      without_merges <=> only the range part of the tree arg is copied
+      param          Context info for the operation
+
+  DESCRIPTION
+    The constructor creates a full copy of the SEL_TREE arg if
+    the prameter without_merges==FALSE. Otherwise a tree is created
+    that contains the copy only of the range part of the tree arg. 
+*/ 
+
+SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges,
+                   RANGE_OPT_PARAM *param): Sql_alloc()
 {
   keys_map= arg->keys_map;
   type= arg->type;
-  for (int idx= 0; idx < MAX_KEY; idx++)
+  for (uint idx= 0; idx < param->keys; idx++)
   {
     if ((keys[idx]= arg->keys[idx]))
-      keys[idx]->increment_use_count(1);
+      keys[idx]->incr_refs();
   }
 
+  if (without_merges)
+    return;
+
   List_iterator<SEL_IMERGE> it(arg->merges);
   for (SEL_IMERGE *el= it++; el; el= it++)
   {
-    SEL_IMERGE *merge= new SEL_IMERGE(el, param);
+    SEL_IMERGE *merge= new SEL_IMERGE(el, 0, param);
     if (!merge || merge->trees == merge->trees_next)
     {
       merges.empty();
@@ -942,7 +1355,23 @@ SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc()
 }
 
 
-SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
+/*
+  Copy constructor for SEL_IMERGE objects
+
+  SYNOPSIS
+    SEL_IMERGE
+      arg         The source imerge for the constructor
+      cnt         How many trees from arg are to be copied
+      param       Context info for the operation
+
+  DESCRIPTION
+    The cnt==0 then the constructor creates a full copy of the 
+    imerge arg. Otherwise only the first cnt trees of the imerge
+    are copied.
+*/ 
+
+SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt,
+                       RANGE_OPT_PARAM *param) : Sql_alloc()
 {
   uint elements= (arg->trees_end - arg->trees);
   if (elements > PREALLOCED_TREES)
@@ -954,13 +1383,13 @@ SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
   else
     trees= &trees_prealloced[0];
 
-  trees_next= trees;
+  trees_next= trees + (cnt ? cnt : arg->trees_next-arg->trees);
   trees_end= trees + elements;
 
-  for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_end; 
+  for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_next; 
        tree++, arg_tree++)
   {
-    if (!(*tree= new SEL_TREE(*arg_tree, param)))
+    if (!(*tree= new SEL_TREE(*arg_tree, FALSE, param)))
       goto mem_err;
   }
 
@@ -974,7 +1403,19 @@ mem_err:
 
 
 /*
-  Perform AND operation on two index_merge lists and store result in *im1.
+  Perform AND operation on two imerge lists
+
+  SYNOPSIS
+    imerge_list_and_list()
+      param             Context info for the operation         
+      im1               The first imerge list for the operation
+      im2               The second imerge list for the operation
+
+  DESCRIPTION
+    The function just appends the imerge list im2 to the imerge list im1  
+    
+  RETURN VALUE
+    none
 */
 
 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
@@ -984,73 +1425,242 @@ inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
 
 
 /*
-  Perform OR operation on 2 index_merge lists, storing result in first list.
-
-  NOTES
-    The following conversion is implemented:
-     (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
-      => (a_1||b_1).
-
-    i.e. all conjuncts except the first one are currently dropped.
-    This is done to avoid producing N*K ways to do index_merge.
-
-    If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
-    and index_merge is discarded (while it is actually possible to try
-    harder).
-
-    As a consequence of this, choice of keys to do index_merge read may depend
-    on the order of conditions in WHERE part of the query.
+  Perform OR operation on two imerge lists
 
+  SYNOPSIS
+    imerge_list_or_list()
+      param             Context info for the operation         
+      im1               The first imerge list for the operation
+      im2               The second imerge list for the operation
+     
+  DESCRIPTION
+    Assuming that the first imerge list represents the formula
+      F1= M1_1 AND ... AND M1_k1 
+    while the second imerge list represents the formula 
+      F2= M2_1 AND ... AND M2_k2,
+    where M1_i= RT1_i_1 OR ... OR RT1_i_l1i (i in [1..k1])
+    and M2_i = RT2_i_1 OR ... OR RT2_i_l2i (i in [1..k2]),
+    the function builds a list of imerges for some formula that can be 
+    inferred from the formula (F1 OR F2).
+
+    More exactly the function builds imerges for the formula (M1_1 OR M2_1).
+    Note that
+      (F1 OR F2) = (M1_1 AND ... AND M1_k1) OR (M2_1 AND ... AND M2_k2) =
+      AND (M1_i OR M2_j) (i in [1..k1], j in [1..k2]) =>
+      M1_1 OR M2_1.
+    So (M1_1 OR M2_1) is indeed an inference formula for (F1 OR F2).
+
+    To build imerges for the formula (M1_1 OR M2_1) the function invokes,
+    possibly twice, the method SEL_IMERGE::or_sel_imerge_with_checks
+    for the imerge m1_1.
+    At its first invocation the method SEL_IMERGE::or_sel_imerge_with_checks
+    performs OR operation on the imerge m1_1 and the range tree rt2_1_1 by
+    calling SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==TRUE.
+    The resulting imerge of the operation is ored with the next range tree of
+    the imerge m2_1. This oring continues until the last range tree from
+    m2_1 has been ored. 
+    At its second invocation the method SEL_IMERGE::or_sel_imerge_with_checks
+    performs the same sequence of OR operations, but now calling
+    SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==FALSE.
+
+    The imerges that the operation produces replace those in the list im1   
+       
   RETURN
-    0     OK, result is stored in *im1
-    other Error, both passed lists are unusable
+    0     if the operation is a success 
+   -1     if the function has run out of memory 
 */
 
 int imerge_list_or_list(RANGE_OPT_PARAM *param,
                         List<SEL_IMERGE> *im1,
                         List<SEL_IMERGE> *im2)
 {
+
+  uint rc;
+  bool is_last_check_pass= FALSE;
+
   SEL_IMERGE *imerge= im1->head();
+  uint elems= imerge->trees_next-imerge->trees;
   im1->empty();
   im1->push_back(imerge);
 
-  return imerge->or_sel_imerge_with_checks(param, im2->head());
+  rc= imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
+                                        TRUE, &is_last_check_pass);
+  if (rc)
+  {
+    if (rc == 1)
+    {
+      im1->empty();
+      rc= 0;
+    }
+    return rc;
+  }
+
+  if (!is_last_check_pass)
+  {
+    SEL_IMERGE* new_imerge= new SEL_IMERGE(imerge, elems, param);
+    if (new_imerge)
+    {
+      is_last_check_pass= TRUE;
+      rc= new_imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
+                                                 FALSE, &is_last_check_pass);
+      if (!rc)
+        im1->push_back(new_imerge); 
+    }
+  }
+  return rc;  
 }
 
 
 /*
-  Perform OR operation on index_merge list and key tree.
+  Perform OR operation for each imerge from a list and the range part of a tree
+
+  SYNOPSIS
+    imerge_list_or_tree()
+      param       Context info for the operation
+      merges      The list of imerges to be ored with the range part of tree          
+      tree        SEL_TREE whose range part is to be ored with the imerges
+
+  DESCRIPTION
+    For each imerge mi from the list 'merges' the function performes OR
+    operation with mi and the range part of 'tree' rt, producing one or
+    two imerges.
 
+    Given the merge mi represent the formula RTi_1 OR ... OR RTi_k, 
+    the function forms the merges by the following rules:
+ 
+    1. If rt cannot be ored with any of the trees rti the function just
+       produces an imerge that represents the formula
+         RTi_1 OR ... RTi_k OR RT.
+    2. If there exist a tree rtj that must be ored with rt the function
+       produces an imerge the represents the formula
+         RTi_1 OR ... OR (RTi_j OR RT) OR ... OR RTi_k,
+       where the range tree for (RTi_j OR RT) is constructed by oring the
+       SEL_ARG trees that must be ored.
+    3. For each rti_j that can be ored with rt the function produces
+       the new tree rti_j' and substitutes rti_j for this new range tree.
+
+    In any case the function removes mi from the list and then adds all
+    produced imerges.
+
+    To build imerges by rules 1-3 the function calls the method
+    SEL_IMERGE::or_sel_tree_with_checks, possibly twice. With the first
+    call it passes TRUE for the third parameter of the function.
+    At this first call imerges by rules 1-2 are built. If the call
+    returns FALSE as the return value of its fourth parameter then the
+    function are called for the second time. At this call the imerge
+    of rule 3 is produced.
+
+    If a call of SEL_IMERGE::or_sel_tree_with_checks returns 1 then
+    then it means that the produced tree contains an always true
+    range tree and the whole imerge can be discarded.
+    
   RETURN
-    0     OK, result is stored in *im1.
-    other Error
+    1     if no imerges are produced
+    0     otherwise
 */
 
+static
 int imerge_list_or_tree(RANGE_OPT_PARAM *param,
-                        List<SEL_IMERGE> *im1,
+                        List<SEL_IMERGE> *merges,
                         SEL_TREE *tree)
 {
+
   SEL_IMERGE *imerge;
-  List_iterator<SEL_IMERGE> it(*im1);
-  bool tree_used= FALSE;
+  List<SEL_IMERGE> additional_merges;
+  List_iterator<SEL_IMERGE> it(*merges);
+  
   while ((imerge= it++))
   {
-    SEL_TREE *or_tree;
-    if (tree_used)
+    bool is_last_check_pass;
+    int rc= 0;
+    int rc1= 0;
+    SEL_TREE *or_tree= new SEL_TREE (tree, FALSE, param);
+    if (or_tree)
     {
-      or_tree= new SEL_TREE (tree, param);
-      if (!or_tree ||
-          (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty()))
-        return FALSE;
+      uint elems= imerge->trees_next-imerge->trees;
+      rc= imerge->or_sel_tree_with_checks(param, elems, or_tree,
+                                          TRUE, &is_last_check_pass);
+      if (!is_last_check_pass)
+      {
+        SEL_IMERGE *new_imerge= new SEL_IMERGE(imerge, elems, param);
+        if (new_imerge)
+	{ 
+          rc1= new_imerge->or_sel_tree_with_checks(param, elems, or_tree,
+                                                   FALSE, &is_last_check_pass);
+          if (!rc1)
+            additional_merges.push_back(new_imerge);
+        }
+      }
     }
-    else
-      or_tree= tree;
-
-    if (imerge->or_sel_tree_with_checks(param, or_tree))
+    if (rc || rc1 || !or_tree)
       it.remove();
-    tree_used= TRUE;
   }
-  return im1->is_empty();
+
+  merges->concat(&additional_merges);  
+  return merges->is_empty();
+}
+
+
+/*
+  Perform pushdown operation of the range part of a tree into given imerges 
+
+  SYNOPSIS
+    imerge_list_and_tree()
+      param           Context info for the operation
+      merges   IN/OUT List of imerges to push the range part of 'tree' into
+      tree            SEL_TREE whose range part is to be pushed into imerges
+
+  DESCRIPTION
+    For each imerge from the list merges the function pushes the range part
+    rt of 'tree' into the imerge. 
+    More exactly if the imerge mi from the list represents the formula
+      RTi_1 OR ... OR RTi_k 
+    the function bulds a new imerge that represents the formula
+      (RTi_1 AND RT) OR ... OR (RTi_k AND RT)
+    and adds this imerge to the list merges.
+    To perform this pushdown operation the function calls the method
+    SEL_IMERGE::and_sel_tree. 
+    For any imerge mi the new imerge is not created if for each pair of
+    trees rti_j and rt the intersection of the indexes with defined ranges
+    is empty.
+    If the result of the pushdown operation for the imerge mi returns an
+    imerge with no trees then then not only nothing is added to the list 
+    merges but mi itself is removed from the list. 
+     
+  RETURN
+    1    if no imerges are left in the list merges             
+    0    otherwise
+*/
+
+static
+int imerge_list_and_tree(RANGE_OPT_PARAM *param,
+                         List<SEL_IMERGE> *merges,
+                         SEL_TREE *tree)
+{
+  SEL_IMERGE *imerge;
+  SEL_IMERGE *new_imerge= NULL;
+  List<SEL_IMERGE> new_merges;
+  List_iterator<SEL_IMERGE> it(*merges);
+  
+  while ((imerge= it++))
+  {
+    if (!new_imerge)
+       new_imerge= new SEL_IMERGE();
+    if (imerge->have_common_keys(param, tree) && 
+        new_imerge && !imerge->and_sel_tree(param, tree, new_imerge))
+    {
+      if (new_imerge->trees == new_imerge->trees_next)
+        it.remove();
+      else
+      {         
+        new_merges.push_back(new_imerge);
+        new_imerge= NULL;
+      }
+    }
+  }
+  imerge_list_and_list(&new_merges, merges);
+  *merges= new_merges;
+  return merges->is_empty();
 }
 
 
@@ -1084,7 +1694,7 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables,
   select->read_tables=read_tables;
   select->const_tables=const_tables;
   select->head=head;
-  select->cond=conds;
+  select->cond= conds;
 
   if (head->sort.io_cache)
   {
@@ -1098,7 +1708,7 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables,
 }
 
 
-SQL_SELECT::SQL_SELECT() :quick(0),cond(0),free_cond(0)
+SQL_SELECT::SQL_SELECT() :quick(0),cond(0),pre_idx_push_select_cond(NULL),free_cond(0)
 {
   quick_keys.clear_all(); needed_reg.clear_all();
   my_b_clear(&file);
@@ -1132,25 +1742,22 @@ QUICK_SELECT_I::QUICK_SELECT_I()
 {}
 
 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
-                                       bool no_alloc, MEM_ROOT *parent_alloc)
-  :dont_free(0),error(0),free_file(0),in_range(0),cur_range(NULL),last_range(0)
+                                       bool no_alloc, MEM_ROOT *parent_alloc,
+                                       bool *create_error)
+  :doing_key_read(0),free_file(0),cur_range(NULL),last_range(0),dont_free(0)
 {
   my_bitmap_map *bitmap;
   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
 
   in_ror_merged_scan= 0;
-  sorted= 0;
   index= key_nr;
   head=  table;
   key_part_info= head->key_info[index].key_part;
   my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
 
   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
-  multi_range_bufsiz= thd->variables.read_rnd_buff_size;
-  multi_range_count= thd->variables.multi_range_count;
-  multi_range_length= 0;
-  multi_range= NULL;
-  multi_range_buff= NULL;
+  mrr_buf_size= thd->variables.mrr_buff_size;
+  mrr_buf_desc= NULL;
 
   if (!no_alloc && !parent_alloc)
   {
@@ -1165,12 +1772,12 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
   save_read_set= head->read_set;
   save_write_set= head->write_set;
 
-  /* Allocate a bitmap for used columns */
+  /* Allocate a bitmap for used columns (Q: why not on MEM_ROOT?) */
   if (!(bitmap= (my_bitmap_map*) my_malloc(head->s->column_bitmap_size,
                                            MYF(MY_WME))))
   {
     column_bitmap.bitmap= 0;
-    error= 1;
+    *create_error= 1;
   }
   else
     bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
@@ -1178,6 +1785,20 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
 }
 
 
+void QUICK_RANGE_SELECT::need_sorted_output()
+{
+  if (!(mrr_flags & HA_MRR_SORTED))
+  {
+    /*
+      Native implementation can't produce sorted output. We'll have to
+      switch to default
+    */
+    mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; 
+  }
+  mrr_flags |= HA_MRR_SORTED;
+}
+
+
 int QUICK_RANGE_SELECT::init()
 {
   DBUG_ENTER("QUICK_RANGE_SELECT::init");
@@ -1204,13 +1825,14 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
     if (file) 
     {
       range_end();
-      head->set_keyread(FALSE);
+      if (doing_key_read)
+        file->extra(HA_EXTRA_NO_KEYREAD);
       if (free_file)
       {
         DBUG_PRINT("info", ("Freeing separate handler 0x%lx (free: %d)", (long) file,
                             free_file));
         file->ha_external_lock(current_thd, F_UNLCK);
-        file->close();
+        file->ha_close();
         delete file;
       }
     }
@@ -1219,17 +1841,22 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
     my_free(column_bitmap.bitmap);
   }
   head->column_bitmaps_set(save_read_set, save_write_set);
-  my_free(multi_range);
-  my_free(multi_range_buff);
+  my_free(mrr_buf_desc);
   DBUG_VOID_RETURN;
 }
 
+/*
+  QUICK_INDEX_SORT_SELECT works as follows:
+  - Do index scans, accumulate rowids in the Unique object 
+    (Unique will also sort and de-duplicate rowids)
+  - Use rowids from unique to run a disk-ordered sweep
+*/
 
-QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
-                                                   TABLE *table)
+QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param,
+                                                 TABLE *table)
   :unique(NULL), pk_quick_select(NULL), thd(thd_param)
 {
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT");
   index= MAX_KEY;
   head= table;
   bzero(&read_record, sizeof(read_record));
@@ -1237,38 +1864,48 @@ QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
   DBUG_VOID_RETURN;
 }
 
-int QUICK_INDEX_MERGE_SELECT::init()
+int QUICK_INDEX_SORT_SELECT::init()
 {
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::init");
   DBUG_RETURN(0);
 }
 
-int QUICK_INDEX_MERGE_SELECT::reset()
+int QUICK_INDEX_SORT_SELECT::reset()
 {
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::reset");
   DBUG_RETURN(read_keys_and_merge());
 }
 
 bool
-QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
+QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
 {
-  /*
-    Save quick_select that does scan on clustered primary key as it will be
-    processed separately.
-  */
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::push_quick_back");
   if (head->file->primary_key_is_clustered() &&
       quick_sel_range->index == head->s->primary_key)
+  {
+   /*
+     A quick_select over a clustered primary key is handled specifically
+     Here we assume:
+     - PK columns are included in any other merged index
+     - Scan on the PK is disk-ordered.
+       (not meeting #2 will only cause performance degradation)
+
+       We could treat clustered PK as any other index, but that would
+       be inefficient. There is no point in doing scan on
+       CPK, remembering the rowid, then making rnd_pos() call with
+       that rowid.
+    */
     pk_quick_select= quick_sel_range;
-  else
-    return quick_selects.push_back(quick_sel_range);
-  return 0;
+    DBUG_RETURN(0);
+  }
+  DBUG_RETURN(quick_selects.push_back(quick_sel_range));
 }
 
-QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
+QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT()
 {
   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
   QUICK_RANGE_SELECT* quick;
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT");
   delete unique;
   quick_it.rewind();
   while ((quick= quick_it++))
@@ -1282,7 +1919,6 @@ QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
   DBUG_VOID_RETURN;
 }
 
-
 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
                                                        TABLE *table,
                                                        bool retrieve_full_rows,
@@ -1345,6 +1981,7 @@ int QUICK_ROR_INTERSECT_SELECT::init()
 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
 {
   handler *save_file= file, *org_file;
+  my_bool org_key_read;
   THD *thd;
   DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
 
@@ -1390,7 +2027,7 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
   if (init() || reset())
   {
     file->ha_external_lock(thd, F_UNLCK);
-    file->close();
+    file->ha_close();
     goto failure;
   }
   free_file= TRUE;
@@ -1404,12 +2041,17 @@ end:
     The now bitmap is stored in 'column_bitmap' which is used in ::get_next()
   */
   org_file= head->file;
+  org_key_read= head->key_read;
   head->file= file;
-  /* We don't have to set 'head->keyread' here as the 'file' is unique */
+  head->key_read= 0;
   if (!head->no_keyread)
+  {
+    doing_key_read= 1;
     head->mark_columns_used_by_index(index);
+  }
   head->prepare_for_position();
   head->file= org_file;
+  head->key_read= org_key_read;
   bitmap_copy(&column_bitmap, head->read_set);
   head->column_bitmaps_set(&column_bitmap, &column_bitmap);
 
@@ -1435,15 +2077,17 @@ failure:
 */
 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
 {
-  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
-  QUICK_RANGE_SELECT* quick;
+  List_iterator_fast<QUICK_SELECT_WITH_RECORD> quick_it(quick_selects);
+  QUICK_SELECT_WITH_RECORD *cur;
+  QUICK_RANGE_SELECT *quick;
   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
 
   /* Initialize all merged "children" quick selects */
   DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
   if (!need_to_fetch_row && reuse_handler)
   {
-    quick= quick_it++;
+    cur= quick_it++;
+    quick= cur->quick;
     /*
       There is no use of this->file. Use it for the first of merged range
       selects.
@@ -1452,8 +2096,9 @@ int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
       DBUG_RETURN(1);
     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
   }
-  while ((quick= quick_it++))
+  while ((cur= quick_it++))
   {
+    quick= cur->quick;
     if (quick->init_ror_merged_scan(FALSE))
       DBUG_RETURN(1);
     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
@@ -1461,7 +2106,7 @@ int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
     quick->record= head->record[0];
   }
 
-  if (need_to_fetch_row && head->file->ha_rnd_init(1))
+  if (need_to_fetch_row && head->file->ha_rnd_init_with_error(1))
   {
     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
     DBUG_RETURN(1);
@@ -1485,10 +2130,10 @@ int QUICK_ROR_INTERSECT_SELECT::reset()
   if (!scans_inited && init_ror_merged_scan(TRUE))
     DBUG_RETURN(1);
   scans_inited= TRUE;
-  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
-  QUICK_RANGE_SELECT *quick;
-  while ((quick= it++))
-    quick->reset();
+  List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
+  QUICK_SELECT_WITH_RECORD *qr;
+  while ((qr= it++))
+    qr->quick->reset();
   DBUG_RETURN(0);
 }
 
@@ -1498,6 +2143,7 @@ int QUICK_ROR_INTERSECT_SELECT::reset()
 
   SYNOPSIS
     QUICK_ROR_INTERSECT_SELECT::push_quick_back()
+      alloc Mem root to create auxiliary structures on
       quick Quick select to be added. The quick select must return
             rows in rowid order.
   NOTES
@@ -1509,11 +2155,17 @@ int QUICK_ROR_INTERSECT_SELECT::reset()
 */
 
 bool
-QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
+QUICK_ROR_INTERSECT_SELECT::push_quick_back(MEM_ROOT *alloc, QUICK_RANGE_SELECT *quick)
 {
-  return quick_selects.push_back(quick);
+  QUICK_SELECT_WITH_RECORD *qr;
+  if (!(qr= new QUICK_SELECT_WITH_RECORD) || 
+      !(qr->key_tuple= (uchar*)alloc_root(alloc, quick->max_used_key_length)))
+    return TRUE;
+  qr->quick= quick;
+  return quick_selects.push_back(qr);
 }
 
+
 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
 {
   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
@@ -1577,7 +2229,7 @@ int QUICK_ROR_UNION_SELECT::init()
   DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
   if (init_queue(&queue, quick_selects.elements, 0,
                  FALSE , QUICK_ROR_UNION_SELECT_queue_cmp,
-                 (void*) this))
+                 (void*) this, 0, 0))
   {
     bzero(&queue, sizeof(QUEUE));
     DBUG_RETURN(1);
@@ -1636,7 +2288,7 @@ int QUICK_ROR_UNION_SELECT::reset()
     queue_insert(&queue, (uchar*)quick);
   }
 
-  if (head->file->ha_rnd_init(1))
+  if (head->file->ha_rnd_init_with_error(1))
   {
     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
     DBUG_RETURN(1);
@@ -1682,6 +2334,7 @@ SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
   min_value=arg.min_value;
   max_value=arg.max_value;
   next_key_part=arg.next_key_part;
+  max_part_no= arg.max_part_no;
   use_count=1; elements=1;
 }
 
@@ -1699,9 +2352,10 @@ SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
   :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
    elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
    max_value((uchar*) max_value_arg), next(0),prev(0),
-   next_key_part(0),color(BLACK),type(KEY_RANGE)
+   next_key_part(0), color(BLACK), type(KEY_RANGE)
 {
   left=right= &null_element;
+  max_part_no= 1;
 }
 
 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
@@ -1712,6 +2366,7 @@ SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
    field(field_), min_value(min_value_), max_value(max_value_),
    next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE)
 {
+  max_part_no= part+1;
   left=right= &null_element;
 }
 
@@ -1755,6 +2410,7 @@ SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
   increment_use_count(1);
   tmp->color= color;
   tmp->elements= this->elements;
+  tmp->max_part_no= max_part_no;
   return tmp;
 }
 
@@ -1908,9 +2564,11 @@ class TRP_RANGE : public TABLE_READ_PLAN
 public:
   SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
   uint     key_idx; /* key number in PARAM::key */
+  uint     mrr_flags; 
+  uint     mrr_buf_size;
 
-  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg)
-   : key(key_arg), key_idx(idx_arg)
+  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
+   : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
   {}
   virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
 
@@ -1919,7 +2577,8 @@ public:
   {
     DBUG_ENTER("TRP_RANGE::make_quick");
     QUICK_RANGE_SELECT *quick;
-    if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
+    if ((quick= get_quick_select(param, key_idx, key,  mrr_flags, 
+                                 mrr_buf_size, parent_alloc)))
     {
       quick->records= records;
       quick->read_time= read_cost;
@@ -1967,6 +2626,26 @@ public:
 
 
 /*
+  Plan for QUICK_INDEX_INTERSECT_SELECT scan.
+  QUICK_INDEX_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
+  is ignored by make_quick.
+*/
+
+class TRP_INDEX_INTERSECT : public TABLE_READ_PLAN
+{
+public:
+  TRP_INDEX_INTERSECT() {}                        /* Remove gcc warning */
+  virtual ~TRP_INDEX_INTERSECT() {}               /* Remove gcc warning */
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
+                             MEM_ROOT *parent_alloc);
+  TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */
+  TRP_RANGE **range_scans_end; /* end of the array */
+  /* keys whose scans are to be filtered by cpk conditions */
+  key_map filtered_scans;  
+};
+
+
+/*
   Plan for QUICK_INDEX_MERGE_SELECT scan.
   QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
   is ignored by make_quick.
@@ -2037,6 +2716,38 @@ public:
 };
 
 
+typedef struct st_index_scan_info
+{
+  uint      idx;      /* # of used key in param->keys */
+  uint      keynr;    /* # of used key in table */
+  uint      range_count;
+  ha_rows   records;  /* estimate of # records this scan will return */
+
+  /* Set of intervals over key fields that will be used for row retrieval. */
+  SEL_ARG   *sel_arg;
+
+  KEY *key_info;
+  uint used_key_parts;
+
+  /* Estimate of # records filtered out by intersection with cpk */
+  ha_rows   filtered_out;
+  /* Bitmap of fields used in index intersection */ 
+  MY_BITMAP used_fields;
+
+  /* Fields used in the query and covered by ROR scan. */
+  MY_BITMAP covered_fields;
+  uint      used_fields_covered; /* # of set bits in covered_fields */
+  int       key_rec_length; /* length of key record (including rowid) */
+
+  /*
+    Cost of reading all index records with values in sel_arg intervals set
+    (assuming there is no need to access full table records)
+  */
+  double    index_read_cost;
+  uint      first_uncovered_field; /* first unused bit in covered_fields */
+  uint      key_components; /* # of parts in the key */
+} INDEX_SCAN_INFO;
+
 /*
   Fill param->needed_fields with bitmap of fields used in the query.
   SYNOPSIS
@@ -2148,7 +2859,8 @@ static int fill_used_fields_bitmap(PARAM *param)
 
 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 				  table_map prev_tables,
-				  ha_rows limit, bool force_quick_range)
+				  ha_rows limit, bool force_quick_range, 
+                                  bool ordered_output)
 {
   uint idx;
   double scan_time;
@@ -2161,7 +2873,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   quick=0;
   needed_reg.clear_all();
   quick_keys.clear_all();
-  if (keys_to_use.is_clear_all())
+  DBUG_ASSERT(!head->is_filled_at_execution());
+  if (keys_to_use.is_clear_all() || head->is_filled_at_execution())
     DBUG_RETURN(0);
   records= head->file->stats.records;
   if (!records)
@@ -2204,6 +2917,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     param.imerge_cost_buff_size= 0;
     param.using_real_indexes= TRUE;
     param.remove_jump_scans= TRUE;
+    param.force_default_mrr= ordered_output;
 
     thd->no_errors=1;				// Don't warn about NULL
     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
@@ -2257,9 +2971,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     if (!head->covering_keys.is_clear_all())
     {
       int key_for_use= find_shortest_key(head, &head->covering_keys);
-      double key_read_time= (get_index_only_read_time(&param, records,
-                                                     key_for_use) +
-                             (double) records / TIME_FOR_COMPARE);
+      double key_read_time= head->file->keyread_time(key_for_use, 1, records) +
+                            (double) records / TIME_FOR_COMPARE;
       DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
                            "read time %g", key_for_use, key_read_time));
       if (key_read_time < read_time)
@@ -2311,72 +3024,92 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
         It is possible to use a range-based quick select (but it might be
         slower than 'all' table scan).
       */
-      if (tree->merges.is_empty())
-      {
-        TRP_RANGE         *range_trp;
-        TRP_ROR_INTERSECT *rori_trp;
-        bool can_build_covering= FALSE;
+      TRP_RANGE         *range_trp;
+      TRP_ROR_INTERSECT *rori_trp;
+      TRP_INDEX_INTERSECT *intersect_trp;
+      bool can_build_covering= FALSE;
+      
+      remove_nonrange_trees(&param, tree);
 
-        /* Get best 'range' plan and prepare data for making other plans */
-        if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
-                                             best_read_time)))
-        {
-          best_trp= range_trp;
-          best_read_time= best_trp->read_cost;
-        }
+      /* Get best 'range' plan and prepare data for making other plans */
+      if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
+                                           best_read_time)))
+      {
+        best_trp= range_trp;
+        best_read_time= best_trp->read_cost;
+      }
 
+      /*
+        Simultaneous key scans and row deletes on several handler
+        objects are not allowed so don't use ROR-intersection for
+        table deletes.
+      */
+      if ((thd->lex->sql_command != SQLCOM_DELETE) && 
+           optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
+      {
         /*
-          Simultaneous key scans and row deletes on several handler
-          objects are not allowed so don't use ROR-intersection for
-          table deletes.
+          Get best non-covering ROR-intersection plan and prepare data for
+          building covering ROR-intersection.
         */
-        if ((thd->lex->sql_command != SQLCOM_DELETE) && 
-             optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
+        if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time,
+                                              &can_build_covering)))
         {
+          best_trp= rori_trp;
+          best_read_time= best_trp->read_cost;
           /*
-            Get best non-covering ROR-intersection plan and prepare data for
-            building covering ROR-intersection.
+            Try constructing covering ROR-intersect only if it looks possible
+            and worth doing.
           */
-          if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time,
-                                                &can_build_covering)))
-          {
+          if (!rori_trp->is_covering && can_build_covering &&
+              (rori_trp= get_best_covering_ror_intersect(&param, tree,
+                                                         best_read_time)))
             best_trp= rori_trp;
-            best_read_time= best_trp->read_cost;
-            /*
-              Try constructing covering ROR-intersect only if it looks possible
-              and worth doing.
-            */
-            if (!rori_trp->is_covering && can_build_covering &&
-                (rori_trp= get_best_covering_ror_intersect(&param, tree,
-                                                           best_read_time)))
-              best_trp= rori_trp;
-          }
         }
       }
-      else
+      /*
+        Do not look for an index intersection  plan if there is a covering
+        index. The scan by this covering index will be always cheaper than
+        any index intersection.
+      */
+      if (param.table->covering_keys.is_clear_all() &&
+          optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
+          optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT))
       {
-        if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
+        if ((intersect_trp= get_best_index_intersect(&param, tree,
+                                                    best_read_time)))
         {
-          /* Try creating index_merge/ROR-union scan. */
-          SEL_IMERGE *imerge;
-          TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
-          LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
-          DBUG_PRINT("info",("No range reads possible,"
-                             " trying to construct index_merge"));
-          List_iterator_fast<SEL_IMERGE> it(tree->merges);
-          while ((imerge= it++))
+          best_trp= intersect_trp;
+          best_read_time= best_trp->read_cost; 
+          set_if_smaller(param.table->quick_condition_rows, 
+                         intersect_trp->records);
+        }
+      }
+
+      if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
+      {
+        /* Try creating index_merge/ROR-union scan. */
+        SEL_IMERGE *imerge;
+        TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
+        LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
+        DBUG_PRINT("info",("No range reads possible,"
+                           " trying to construct index_merge"));
+        List_iterator_fast<SEL_IMERGE> it(tree->merges);
+        while ((imerge= it++))
+        {
+          new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
+          if (new_conj_trp)
+            set_if_smaller(param.table->quick_condition_rows, 
+                           new_conj_trp->records);
+          if (new_conj_trp &&
+              (!best_conj_trp || 
+               new_conj_trp->read_cost < best_conj_trp->read_cost))
           {
-            new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
-            if (new_conj_trp)
-              set_if_smaller(param.table->quick_condition_rows, 
-                             new_conj_trp->records);
-            if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost <
-                                   best_conj_trp->read_cost))
-              best_conj_trp= new_conj_trp;
+            best_conj_trp= new_conj_trp;
+            best_read_time= best_conj_trp->read_cost;
           }
-          if (best_conj_trp)
-            best_trp= best_conj_trp;
         }
+        if (best_conj_trp)
+          best_trp= best_conj_trp;
       }
     }
 
@@ -3673,11 +4406,19 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
   DBUG_ENTER("get_sweep_read_cost");
   if (param->table->file->primary_key_is_clustered())
   {
+    /*
+      We are using the primary key to find the rows.
+      Calculate the cost for this.
+    */
     result= param->table->file->read_time(param->table->s->primary_key,
                                           (uint)records, records);
   }
   else
   {
+    /*
+      Rows will be retreived with rnd_pos(). Caluclate the expected
+      cost for this.
+    */
     double n_blocks=
       ceil(ulonglong2double(param->table->file->stats.data_file_length) /
            IO_SIZE);
@@ -3694,7 +4435,7 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
       return 1;
     */
     JOIN *join= param->thd->lex->select_lex.join;
-    if (!join || join->tables == 1)
+    if (!join || join->table_count == 1)
     {
       /* No join, assume reading is done in one 'sweep' */
       result= busy_blocks*(DISK_SEEK_BASE_COST +
@@ -3785,7 +4526,6 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
 {
   SEL_TREE **ptree;
   TRP_INDEX_MERGE *imerge_trp= NULL;
-  uint n_child_scans= imerge->trees_next - imerge->trees;
   TRP_RANGE **range_scans;
   TRP_RANGE **cur_child;
   TRP_RANGE **cpk_scan= NULL;
@@ -3805,6 +4545,24 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
   DBUG_ENTER("get_best_disjunct_quick");
   DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
 
+  /*
+    In every tree of imerge remove SEL_ARG trees that do not make ranges.
+    If after this removal some SEL_ARG tree becomes empty discard imerge.  
+  */
+  for (ptree= imerge->trees; ptree != imerge->trees_next; ptree++)
+  {
+    if (remove_nonrange_trees(param, *ptree))
+    {
+      imerge->trees_next= imerge->trees;
+      break;
+    }
+  }
+
+  uint n_child_scans= imerge->trees_next - imerge->trees;
+  
+  if (!n_child_scans)
+    DBUG_RETURN(NULL);
+
   if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
                                              sizeof(TRP_RANGE*)*
                                              n_child_scans)))
@@ -3849,8 +4607,9 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
 
   DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost));
   if (imerge_too_expensive || (imerge_cost > read_time) ||
-      ((non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
-      read_time != DBL_MAX))
+      ((non_cpk_scan_records+cpk_scan_records >=
+        param->table->file->stats.records) &&
+       read_time != DBL_MAX))
   {
     /*
       Bail out if it is obvious that both index_merge and ROR-union will be
@@ -3908,7 +4667,9 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
   imerge_cost +=
     Unique::get_use_cost(param->imerge_cost_buff, (uint)non_cpk_scan_records,
                          param->table->file->ref_length,
-                         param->thd->variables.sortbuff_size);
+                         param->thd->variables.sortbuff_size,
+                         TIME_FOR_COMPARE_ROWID,
+                         FALSE, NULL);
   DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
                      imerge_cost, read_time));
   if (imerge_cost < read_time)
@@ -3923,6 +4684,13 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
       imerge_trp->range_scans_end= range_scans + n_child_scans;
       read_time= imerge_cost;
     }
+    if (imerge_trp)
+    {
+      TABLE_READ_PLAN *trp= merge_same_index_scans(param, imerge, imerge_trp,
+                                                   read_time);
+      if (trp != imerge_trp)
+        DBUG_RETURN(trp);
+    }
   }
 
 build_ror_index_merge:
@@ -3938,6 +4706,7 @@ build_ror_index_merge:
                                         sizeof(TABLE_READ_PLAN*)*
                                         n_child_scans)))
     DBUG_RETURN(imerge_trp);
+
 skip_to_ror_scan:
   roru_index_costs= 0.0;
   roru_total_records= 0;
@@ -4021,67 +4790,990 @@ skip_to_ror_scan:
       DBUG_RETURN(roru);
     }
   }
-  DBUG_RETURN(imerge_trp);
+    DBUG_RETURN(imerge_trp);
+}
+
+
+/*
+  Merge index scans for the same indexes in an index merge plan
+
+  SYNOPSIS
+    merge_same_index_scans()
+      param           Context info for the operation
+      imerge   IN/OUT SEL_IMERGE from which imerge_trp has been extracted          
+      imerge_trp      The index merge plan where index scans for the same
+                      indexes are to be merges
+      read_time       The upper bound for the cost of the plan to be evaluated
+
+  DESRIPTION
+    For the given index merge plan imerge_trp extracted from the SEL_MERGE
+    imerge the function looks for range scans with the same indexes and merges
+    them into SEL_ARG trees. Then for each such SEL_ARG tree r_i the function
+    creates a range tree rt_i that contains only r_i. All rt_i are joined
+    into one index merge that replaces the original index merge imerge.
+    The function calls get_best_disjunct_quick for the new index merge to
+    get a new index merge plan that contains index scans only for different
+    indexes.
+    If there are no index scans for the same index in the original index
+    merge plan the function does not change the original imerge and returns
+    imerge_trp as its result.
+
+  RETURN
+    The original or or improved index merge plan                        
+*/
+
+static
+TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
+                                        TRP_INDEX_MERGE *imerge_trp,
+                                        double read_time)
+{
+  uint16 first_scan_tree_idx[MAX_KEY];
+  SEL_TREE **tree;
+  TRP_RANGE **cur_child;
+  uint removed_cnt= 0;
+
+  DBUG_ENTER("merge_same_index_scans");
+
+  bzero(first_scan_tree_idx, sizeof(first_scan_tree_idx[0])*param->keys);
+
+  for (tree= imerge->trees, cur_child= imerge_trp->range_scans;
+       tree != imerge->trees_next;
+       tree++, cur_child++)
+  {
+    DBUG_ASSERT(tree);
+    uint key_idx= (*cur_child)->key_idx;
+    uint16 *tree_idx_ptr= &first_scan_tree_idx[key_idx];
+    if (!*tree_idx_ptr)
+      *tree_idx_ptr= (uint16) (tree-imerge->trees+1);
+    else
+    {
+      SEL_TREE **changed_tree= imerge->trees+(*tree_idx_ptr-1);
+      SEL_ARG *key= (*changed_tree)->keys[key_idx];
+      bzero((*changed_tree)->keys,
+            sizeof((*changed_tree)->keys[0])*param->keys);
+      (*changed_tree)->keys_map.clear_all();
+      if (((*changed_tree)->keys[key_idx]=
+             key_or(param, key, (*tree)->keys[key_idx])))
+        (*changed_tree)->keys_map.set_bit(key_idx);
+      *tree= NULL;
+      removed_cnt++;
+    }
+  }
+  if (!removed_cnt)
+    DBUG_RETURN(imerge_trp);
+
+  TABLE_READ_PLAN *trp= NULL;
+  SEL_TREE **new_trees_next= imerge->trees;
+  for (tree= new_trees_next; tree != imerge->trees_next; tree++)
+  {
+    if (!*tree)
+      continue;
+    if (tree > new_trees_next)
+      *new_trees_next= *tree;
+    new_trees_next++;
+  }
+  imerge->trees_next= new_trees_next;
+
+  DBUG_ASSERT(imerge->trees_next>imerge->trees);
+
+  if (imerge->trees_next-imerge->trees > 1)
+    trp= get_best_disjunct_quick(param, imerge, read_time);
+  else
+  {
+    /*
+      This alternative theoretically can be reached when the cost
+      of the index merge for such a formula as
+        (key1 BETWEEN c1_1 AND c1_2) AND key2 > c2 OR
+        (key1 BETWEEN c1_3 AND c1_4) AND key3 > c3
+      is estimated as being cheaper than the cost of index scan for
+      the formula
+        (key1 BETWEEN c1_1 AND c1_2) OR (key1 BETWEEN c1_3 AND c1_4)
+      
+      In the current code this may happen for two reasons:
+      1. for a single index range scan data records are accessed in
+         a random order
+      2. the functions that estimate the cost of a range scan and an
+         index merge retrievals are not well calibrated
+    */
+    trp= get_key_scans_params(param, *imerge->trees, FALSE, TRUE,
+                              read_time);
+  }
+
+  DBUG_RETURN(trp); 
+}
+
+
+/*
+  This structure contains the info common for all steps of a partial
+  index intersection plan. Morever it contains also the info common
+  for index intersect plans. This info is filled in by the function
+  prepare_search_best just before searching for the best index
+  intersection plan.
+*/  
+
+typedef struct st_common_index_intersect_info
+{
+  PARAM *param;           /* context info for range optimizations            */
+  uint key_size;          /* size of a ROWID element stored in Unique object */
+  uint compare_factor;         /* 1/compare - cost to compare two ROWIDs     */
+  ulonglong max_memory_size;   /* maximum space allowed for Unique objects   */   
+  ha_rows table_cardinality;   /* estimate of the number of records in table */
+  double cutoff_cost;        /* discard index intersects with greater costs  */ 
+  INDEX_SCAN_INFO *cpk_scan;  /* clustered primary key used in intersection  */
+
+  bool in_memory;  /* unique object for intersection is completely in memory */
+
+  INDEX_SCAN_INFO **search_scans;    /* scans possibly included in intersect */ 
+  uint n_search_scans;               /* number of elements in search_scans   */
+
+  bool best_uses_cpk;   /* current best intersect uses clustered primary key */
+  double best_cost;       /* cost of the current best index intersection     */
+  /* estimate of the number of records in the current best intersection      */
+  ha_rows best_records;
+  uint best_length;    /* number of indexes in the current best intersection */
+  INDEX_SCAN_INFO **best_intersect;  /* the current best index intersection  */
+  /* scans from the best intersect to be filtrered by cpk conditions         */
+  key_map filtered_scans; 
+
+  uint *buff_elems;        /* buffer to calculate cost of index intersection */
+  
+} COMMON_INDEX_INTERSECT_INFO;
+
+
+/*
+  This structure contains the info specific for one step of an index
+  intersection plan. The structure is filled in by the function 
+   check_index_intersect_extension.
+*/
+
+typedef struct st_partial_index_intersect_info
+{
+  COMMON_INDEX_INTERSECT_INFO *common_info;    /* shared by index intersects */
+  uint length;         /* number of index scans in the partial intersection  */
+  ha_rows records;     /* estimate of the number of records in intersection  */
+  double cost;         /* cost of the partial index intersection             */
+
+  /* estimate of total number of records of all scans of the partial index
+     intersect sent to the Unique object used for the intersection  */
+  ha_rows records_sent_to_unique;
+
+  /* total cost of the scans of indexes from the partial index intersection  */
+  double index_read_cost; 
+
+  bool use_cpk_filter;      /* cpk filter is to be used for this       scan  */  
+  bool in_memory;            /* uses unique object in memory                 */
+  double in_memory_cost;     /* cost of using unique object in memory        */
+
+  key_map filtered_scans;    /* scans to be filtered by cpk conditions       */
+         
+  MY_BITMAP *intersect_fields;     /* bitmap of fields used in intersection  */
+} PARTIAL_INDEX_INTERSECT_INFO;
+
+
+/* Check whether two indexes have the same first n components */
+
+static
+bool same_index_prefix(KEY *key1, KEY *key2, uint used_parts)
+{
+  KEY_PART_INFO *part1= key1->key_part;
+  KEY_PART_INFO *part2= key2->key_part;
+  for(uint i= 0; i < used_parts; i++, part1++, part2++)
+  {
+    if (part1->fieldnr != part2->fieldnr)
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
+/* Create a bitmap for all fields of a table */
+
+static
+bool create_fields_bitmap(PARAM *param, MY_BITMAP *fields_bitmap)
+{
+  my_bitmap_map *bitmap_buf;
+
+  if (!(bitmap_buf= (my_bitmap_map *) alloc_root(param->mem_root,
+                                                 param->fields_bitmap_size)))
+    return TRUE;
+  if (bitmap_init(fields_bitmap, bitmap_buf, param->table->s->fields, FALSE))
+    return TRUE;
+  
+  return FALSE;
+}
+
+/* Compare two indexes scans for sort before search for the best intersection */
+
+static
+int cmp_intersect_index_scan(INDEX_SCAN_INFO **a, INDEX_SCAN_INFO **b)
+{
+  return (*a)->records < (*b)->records ?
+          -1 : (*a)->records == (*b)->records ? 0 : 1;
+}
+
+
+static inline
+void set_field_bitmap_for_index_prefix(MY_BITMAP *field_bitmap,
+                                       KEY_PART_INFO *key_part,
+                                       uint used_key_parts)
+{
+  bitmap_clear_all(field_bitmap);
+  for (KEY_PART_INFO *key_part_end= key_part+used_key_parts;
+       key_part < key_part_end; key_part++)
+  {
+    bitmap_set_bit(field_bitmap, key_part->fieldnr-1);
+  }
+}
+
+
+/*
+  Round up table cardinality read from statistics provided by engine.
+  This function should go away when mysql test will allow to handle
+  more or less easily in the test suites deviations of InnoDB 
+  statistical data.
+*/
+ 
+static inline
+ha_rows get_table_cardinality_for_index_intersect(TABLE *table)
+{
+  if (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)
+    return table->file->stats.records;
+  else
+  {
+    ha_rows d;
+    double q;
+    for (q= (double)table->file->stats.records, d= 1 ; q >= 10; q/= 10, d*= 10 ) ;
+    return (ha_rows) (floor(q+0.5) * d);
+  } 
+}
+
+  
+static
+ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
+                                             INDEX_SCAN_INFO *ext_index_scan);
+
+/*
+  Prepare to search for the best index intersection
+
+  SYNOPSIS
+    prepare_search_best_index_intersect()
+      param         common info about index ranges
+      tree          tree of ranges for indexes than can be intersected
+      common    OUT info needed for search to be filled by the function 
+      init      OUT info for an initial pseudo step of the intersection plans
+      cutoff_cost   cut off cost of the interesting index intersection 
+
+  DESCRIPTION
+    The function initializes all fields of the structure 'common' to be used
+    when searching for the best intersection plan. It also allocates
+    memory to store the most cheap index intersection.
+
+  NOTES
+    When selecting candidates for index intersection we always take only
+    one representative out of any set of indexes that share the same range
+    conditions. These indexes always have the same prefixes and the
+    components of this prefixes are exactly those used in these range
+    conditions.
+    Range conditions over clustered primary key (cpk) is always used only
+    as the condition that filters out some rowids retrieved by the scans
+    for secondary indexes. The cpk index will be handled in special way by
+    the function that search for the best index intersection. 
+
+  RETURN
+    FALSE  in the case of success
+    TRUE   otherwise
+*/
+
+static
+bool prepare_search_best_index_intersect(PARAM *param, 
+                                         SEL_TREE *tree,
+                                         COMMON_INDEX_INTERSECT_INFO *common,
+                                         PARTIAL_INDEX_INTERSECT_INFO *init,
+                                         double cutoff_cost)
+{
+  uint i;
+  uint n_search_scans;
+  double cost;
+  INDEX_SCAN_INFO **index_scan;
+  INDEX_SCAN_INFO **scan_ptr;
+  INDEX_SCAN_INFO *cpk_scan= NULL;
+  TABLE *table= param->table;
+  uint n_index_scans= tree->index_scans_end - tree->index_scans;
+
+  if (!n_index_scans)
+    return 1;
+
+  bzero(init, sizeof(*init));
+  init->common_info= common;
+  init->cost= cutoff_cost;
+
+  common->param= param;
+  common->key_size= table->file->ref_length;
+  common->compare_factor= TIME_FOR_COMPARE_ROWID;
+  common->max_memory_size= param->thd->variables.sortbuff_size;
+  common->cutoff_cost= cutoff_cost;
+  common->cpk_scan= NULL;
+  common->table_cardinality= 
+    get_table_cardinality_for_index_intersect(table);
+
+  if (n_index_scans <= 1)
+    return TRUE;
+
+  if (table->file->primary_key_is_clustered())
+  {
+    INDEX_SCAN_INFO **index_scan_end;
+    index_scan= tree->index_scans;
+    index_scan_end= index_scan+n_index_scans;
+    for ( ; index_scan < index_scan_end; index_scan++)
+    {  
+      if ((*index_scan)->keynr == table->s->primary_key)
+      {
+        common->cpk_scan= cpk_scan= *index_scan;
+        break;
+      }
+    }
+  }
+
+  i= n_index_scans - test(cpk_scan != NULL) + 1;
+
+  if (!(common->search_scans =
+	(INDEX_SCAN_INFO **) alloc_root (param->mem_root,
+                                         sizeof(INDEX_SCAN_INFO *) * i)))
+    return TRUE;
+  bzero(common->search_scans, sizeof(INDEX_SCAN_INFO *) * i);
+
+  INDEX_SCAN_INFO **selected_index_scans= common->search_scans;
+    
+  for (i=0, index_scan= tree->index_scans; i < n_index_scans; i++, index_scan++)
+  {
+    uint used_key_parts= (*index_scan)->used_key_parts;
+    KEY *key_info= (*index_scan)->key_info;
+
+    if (*index_scan == cpk_scan)
+      continue;
+    if (cpk_scan && cpk_scan->used_key_parts >= used_key_parts &&
+        same_index_prefix(cpk_scan->key_info, key_info, used_key_parts))
+      continue;
+
+    cost= table->file->keyread_time((*index_scan)->keynr,
+                                    (*index_scan)->range_count,
+                                    (*index_scan)->records);
+    if (cost >= cutoff_cost)
+      continue;
+   
+    for (scan_ptr= selected_index_scans; *scan_ptr ; scan_ptr++)
+    {
+      /*
+        When we have range conditions for two different indexes with the same
+        beginning it does not make sense to consider both of them for index 
+        intersection if the range conditions are covered by common initial
+        components of the indexes. Actually in this case the indexes are
+        guaranteed to have the same range conditions.
+      */
+      if ((*scan_ptr)->used_key_parts == used_key_parts &&
+          same_index_prefix((*scan_ptr)->key_info, key_info, used_key_parts))
+        break;
+    }
+    if (!*scan_ptr || cost < (*scan_ptr)->index_read_cost)
+    {
+      *scan_ptr= *index_scan;
+      (*scan_ptr)->index_read_cost= cost;
+    }
+  } 
+
+  ha_rows records_in_scans= 0;
+
+  for (scan_ptr=selected_index_scans, i= 0; *scan_ptr; scan_ptr++, i++)
+  {
+    if (create_fields_bitmap(param, &(*scan_ptr)->used_fields))
+      return TRUE;
+    records_in_scans+= (*scan_ptr)->records;
+  }
+  n_search_scans= i;
+
+  if (cpk_scan && create_fields_bitmap(param, &cpk_scan->used_fields))
+    return TRUE;
+  
+  if (!(common->n_search_scans= n_search_scans))
+    return TRUE;
+    
+  common->best_uses_cpk= FALSE;
+  common->best_cost= cutoff_cost + COST_EPS;
+  common->best_length= 0;
+
+  if (!(common->best_intersect=
+	(INDEX_SCAN_INFO **) alloc_root (param->mem_root,
+                                         sizeof(INDEX_SCAN_INFO *) *
+                                         (i + test(cpk_scan != NULL)))))
+    return TRUE;
+
+  size_t calc_cost_buff_size=
+         Unique::get_cost_calc_buff_size((size_t)records_in_scans,
+                                         common->key_size,
+				         common->max_memory_size);
+  if (!(common->buff_elems= (uint *) alloc_root(param->mem_root,
+                                                calc_cost_buff_size)))
+    return TRUE;
+
+  my_qsort(selected_index_scans, n_search_scans, sizeof(INDEX_SCAN_INFO *),
+           (qsort_cmp) cmp_intersect_index_scan);
+
+  if (cpk_scan)
+  {
+    PARTIAL_INDEX_INTERSECT_INFO curr;
+    set_field_bitmap_for_index_prefix(&cpk_scan->used_fields,
+                                      cpk_scan->key_info->key_part,
+                                      cpk_scan->used_key_parts);
+    curr.common_info= common;
+    curr.intersect_fields= &cpk_scan->used_fields;
+    curr.records= cpk_scan->records;
+    curr.length= 1;
+    for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
+    {
+      ha_rows scan_records= (*scan_ptr)->records;
+      ha_rows records= records_in_index_intersect_extension(&curr, *scan_ptr);
+      (*scan_ptr)->filtered_out= records >= scan_records ?
+                                   0 : scan_records-records; 
+    }
+  } 
+  else
+  {
+    for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
+      (*scan_ptr)->filtered_out= 0;
+  }
+
+  return FALSE;
 }
 
 
 /*
-  Calculate cost of 'index only' scan for given index and number of records.
+  On Estimation of the Number of Records in an Index Intersection 
+  ===============================================================
+
+  Consider query Q over table t. Let C be the WHERE condition of  this query,
+  and, idx1(a1_1,...,a1_k1) and idx2(a2_1,...,a2_k2) be some indexes defined
+  on table t.
+  Let rt1 and rt2 be the range trees extracted by the range optimizer from C
+  for idx1 and idx2 respectively.
+  Let #t be the estimate of the number of records in table t provided for the
+  optimizer. 
+  Let #r1 and #r2 be the estimates of the number of records in the range trees
+  rt1 and rt2, respectively, obtained by the range optimizer.
+
+  We need to get an estimate for the number of records in the index 
+  intersection of rt1 and rt2. In other words, we need to estimate the
+  cardinality of the set of records that are in both trees. Let's designate
+  this number by #r.
+
+  If we do not make any assumptions then we can only state that
+     #r<=min(#r1,#r2).
+  With this estimate we can't say that the index intersection scan will be 
+  cheaper than the cheapest index scan.
+
+  Let Rt1 and Rt2 be AND/OR conditions representing rt and rt2 respectively.
+  The probability that a record belongs to rt1 is sel(Rt1)=#r1/#t.
+  The probability that a record belongs to rt2 is sel(Rt2)=#r2/#t.
+
+  If we assume that the values in columns of idx1 and idx2 are independent
+  then #r/#t=sel(Rt1&Rt2)=sel(Rt1)*sel(Rt2)=(#r1/#t)*(#r2/#t).
+  So in this case we have: #r=#r1*#r2/#t.
+
+  The above assumption of independence of the columns in idx1 and idx2 means
+  that:
+  - all columns are different
+  - values from one column do not correlate with values from any other column.
+
+  We can't help with the case when column correlate with each other.
+  Yet, if they are assumed to be uncorrelated the value of #r theoretically can
+  be evaluated . Unfortunately this evaluation, in general, is rather complex.
+
+  Let's consider two indexes idx1:(dept, manager),  idx2:(dept, building)
+  over table 'employee' and two range conditions over these indexes:
+    Rt1: dept=10 AND manager LIKE 'S%'
+    Rt2: dept=10 AND building LIKE 'L%'.
+  We can state that:
+    sel(Rt1&Rt2)=sel(dept=10)*sel(manager LIKE 'S%')*sel(building LIKE 'L%')
+    =sel(Rt1)*sel(Rt2)/sel(dept=10).
+  sel(Rt1/2_0:dept=10) can be estimated if we know the cardinality #r1_0 of
+  the range for sub-index idx1_0 (dept) of the index idx1 or the cardinality
+  #rt2_0 of the same range for sub-index idx2_0(dept) of the index idx2.
+  The current code does not make an estimate either for #rt1_0, or for #rt2_0,
+  but it can be adjusted to provide those numbers.
+  Alternatively, min(rec_per_key) for (dept) could be used to get an upper 
+  bound for the value of sel(Rt1&Rt2). Yet this statistics is not provided
+  now.  
+ 
+  Let's consider two other indexes idx1:(dept, last_name), 
+  idx2:(first_name, last_name) and two range conditions over these indexes:
+    Rt1: dept=5 AND last_name='Sm%'
+    Rt2: first_name='Robert' AND last_name='Sm%'.
+
+  sel(Rt1&Rt2)=sel(dept=5)*sel(last_name='Sm5')*sel(first_name='Robert')
+  =sel(Rt2)*sel(dept=5)
+  Here max(rec_per_key) for (dept) could be used to get an upper bound for
+  the value of sel(Rt1&Rt2).
+  
+  When the intersected indexes have different major columns, but some
+  minor column are common the picture may be more complicated.
+
+  Let's consider the following range conditions for the same indexes as in
+  the previous example:
+    Rt1: (Rt11: dept=5 AND last_name='So%') 
+         OR 
+         (Rt12: dept=7 AND last_name='Saw%')
+    Rt2: (Rt21: first_name='Robert' AND last_name='Saw%')
+         OR
+         (Rt22: first_name='Bob' AND last_name='So%')
+  Here we have:
+  sel(Rt1&Rt2)= sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5) +
+                sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22)
+  Now consider the range condition:
+    Rt1_0: (dept=5 OR dept=7)
+  For this condition we can state that:
+  sel(Rt1_0&Rt2)=(sel(dept=5)+sel(dept=7))*(sel(Rt21)+sel(Rt22))=
+  sel(dept=5)*sel(Rt21)+sel(dept=7)*sel(Rt21)+
+  sel(dept=5)*sel(Rt22)+sel(dept=7)*sel(Rt22)=
+  sel(dept=5)*sel(Rt21)+sel(Rt21)*sel(dept=7)+
+  sel(Rt22)*sel(dept=5)+sel(dept=7)*sel(Rt22) >
+  sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5)+
+  sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22) >
+  sel(Rt1 & Rt2) 
+
+ We've just demonstrated for an example what is intuitively almost obvious
+ in general. We can  remove the ending parts fromrange trees getting less
+ selective range conditions for sub-indexes.
+ So if not a most major component with the number k of an index idx is
+ encountered in the index with which we intersect we can use the sub-index
+ idx_k-1 that includes the components of idx up to the i-th component and
+ the range tree for idx_k-1 to make an upper bound estimate for the number
+  of records in the index intersection.
+ The range tree for idx_k-1 we use here is the subtree of the original range
+  tree for idx that contains only parts from the first k-1 components.
+
+  As it was mentioned above the range optimizer currently does not provide
+  an estimate for the number of records in the ranges for sub-indexes.
+  However, some reasonable upper bound estimate can be obtained.
+
+  Let's consider the following range tree:
+    Rt: (first_name='Robert' AND last_name='Saw%')
+        OR
+        (first_name='Bob' AND last_name='So%')
+  Let #r be the number of records in Rt. Let f_1 be the fan-out of column
+  last_name:
+    f_1 = rec_per_key[first_name]/rec_per_key[last_name].
+  The the number of records in the range tree:
+    Rt_0:  (first_name='Robert' OR first_name='Bob')
+  for the sub-index (first_name) is not greater than max(#r*f_1, #t).
+  Strictly speaking, we can state only that it's not greater than 
+  max(#r*max_f_1, #t), where
+    max_f_1= max_rec_per_key[first_name]/min_rec_per_key[last_name].
+  Yet, if #r/#t is big enough (and this is the case of an index intersection,
+  because using this index range with a single index scan is cheaper than
+  the cost of the intersection when #r/#t is small) then almost safely we
+  can use here f_1 instead of max_f_1.
+
+  The above considerations can be used in future development. Now, they are
+  used partly in the function that provides a rough upper bound estimate for
+  the number of records in an index intersection that follow below.
+*/
+
+/*
+  Estimate the number of records selected by an extension a partial intersection
 
   SYNOPSIS
-    get_index_only_read_time()
-      param    parameters structure
-      records  #of records to read
-      keynr    key to read
+    records_in_index_intersect_extension()
+     curr            partial intersection plan to be extended
+     ext_index_scan  the evaluated extension of this partial plan
+
+  DESCRIPTION
+    The function provides an estimate for the number of records in the
+    intersection of the partial index intersection curr with the index
+    ext_index_scan. If all intersected indexes does not have common columns
+    then  the function returns an exact estimate (assuming there are no
+    correlations between values in the columns). If the intersected indexes
+    have common  columns the function returns an upper bound for the number
+    of records in the intersection provided that the intersection of curr
+    with ext_index_scan can is expected to have less records than the expected
+    number of records in the partial intersection curr. In this case the
+    function also assigns the bitmap of the columns in the extended 
+    intersection to ext_index_scan->used_fields.
+    If the function cannot expect that the number of records in the extended
+    intersection is less that the expected number of records #r in curr then
+    the function returns a number bigger than #r.
 
   NOTES
-    It is assumed that we will read trough the whole key range and that all
-    key blocks are half full (normally things are much better). It is also
-    assumed that each time we read the next key from the index, the handler
-    performs a random seek, thus the cost is proportional to the number of
-    blocks read.
+   See the comment before the desription of the function that explains the
+   reasoning used  by this function.
+    
+  RETURN
+    The expected number of rows in the extended index intersection
+*/
 
-  TODO:
-    Move this to handler->read_time() by adding a flag 'index-only-read' to
-    this call. The reason for doing this is that the current function doesn't
-    handle the case when the row is stored in the b-tree (like in innodb
-    clustered index)
+static
+ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
+                                             INDEX_SCAN_INFO *ext_index_scan)
+{
+  KEY *key_info= ext_index_scan->key_info;
+  KEY_PART_INFO* key_part= key_info->key_part;
+  uint used_key_parts= ext_index_scan->used_key_parts;
+  MY_BITMAP *used_fields= &ext_index_scan->used_fields;
+  
+  if (!curr->length)
+  {
+    /* 
+      If this the first index in the intersection just mark the
+      fields in the used_fields bitmap and return the expected
+      number of records in the range scan for the index provided
+      by the range optimizer.
+    */ 
+    set_field_bitmap_for_index_prefix(used_fields, key_part, used_key_parts);
+    return ext_index_scan->records;
+  }
+
+  uint i;
+  bool better_selectivity= FALSE;
+  ha_rows records= curr->records;
+  MY_BITMAP *curr_intersect_fields= curr->intersect_fields; 
+  for (i= 0; i < used_key_parts; i++, key_part++)
+  {
+    if (bitmap_is_set(curr_intersect_fields, key_part->fieldnr-1))
+      break;
+  }
+  if (i)
+  {
+    ha_rows table_cardinality= curr->common_info->table_cardinality;
+    ha_rows ext_records= ext_index_scan->records;
+    if (i < used_key_parts)
+    {
+      ulong *rec_per_key= key_info->rec_per_key+i-1;
+      ulong f1= rec_per_key[0] ? rec_per_key[0] : 1;
+      ulong f2= rec_per_key[1] ? rec_per_key[1] : 1;
+      ext_records= (ha_rows) ((double) ext_records / f2 * f1);
+    }
+    if (ext_records < table_cardinality)
+    {
+      better_selectivity= TRUE;
+      records= (ha_rows) ((double) records / table_cardinality *
+			  ext_records);
+      bitmap_copy(used_fields, curr_intersect_fields);
+      key_part= key_info->key_part;
+      for (uint j= 0; j < used_key_parts; j++, key_part++)
+        bitmap_set_bit(used_fields, key_part->fieldnr-1);
+    }
+  }
+  return !better_selectivity ? records+1 :
+                               !records ? 1 : records;
+}
+
+
+/* 
+  Estimate the cost a binary search within disjoint cpk range intervals
+
+  Number of comparisons to check whether a cpk value satisfies
+  the cpk range condition = log2(cpk_scan->range_count).
+*/ 
+
+static inline
+double get_cpk_filter_cost(ha_rows filtered_records, 
+                           INDEX_SCAN_INFO *cpk_scan,
+                           double compare_factor)
+{
+  return log((double) (cpk_scan->range_count+1)) / (compare_factor * M_LN2) *
+           filtered_records;
+}
+
+
+/*
+  Check whether a patial index intersection plan can be extended 
+
+  SYNOPSIS
+    check_index_intersect_extension()
+     curr            partial intersection plan to be extended
+     ext_index_scan  a possible extension of this plan to be checked
+     next       OUT  the structure to be filled for the extended plan 
+
+  DESCRIPTION
+    The function checks whether it makes sense to extend the index
+    intersection plan adding the index ext_index_scan, and, if this
+    the case, the function fills in the structure for the extended plan.
+
+  RETURN
+    TRUE      if it makes sense to extend the given plan 
+    FALSE     otherwise
 */
 
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
-                                       int keynr)
+static
+bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
+                                     INDEX_SCAN_INFO *ext_index_scan,
+                                     PARTIAL_INDEX_INTERSECT_INFO *next)
 {
-  double read_time;
-  uint keys_per_block= (param->table->file->stats.block_size/2/
-			(param->table->key_info[keynr].key_length+
-			 param->table->file->ref_length) + 1);
-  read_time=((double) (records+keys_per_block-1)/
-             (double) keys_per_block);
-  return read_time;
+  ha_rows records;
+  ha_rows records_sent_to_unique;
+  double cost;
+  ha_rows ext_index_scan_records= ext_index_scan->records;
+  ha_rows records_filtered_out_by_cpk= ext_index_scan->filtered_out;
+  COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
+  double cutoff_cost= common_info->cutoff_cost;
+  uint idx= curr->length;
+  next->index_read_cost= curr->index_read_cost+ext_index_scan->index_read_cost;
+  if (next->index_read_cost > cutoff_cost)
+    return FALSE; 
+
+  if ((next->in_memory= curr->in_memory))
+    next->in_memory_cost= curr->in_memory_cost;
+
+  next->intersect_fields= &ext_index_scan->used_fields;
+  next->filtered_scans= curr->filtered_scans;
+
+  records_sent_to_unique= curr->records_sent_to_unique;
+
+  next->use_cpk_filter= FALSE;
+
+  /* Calculate the cost of using a Unique object for index intersection */
+  if (idx && next->in_memory)
+  { 
+    /* 
+      All rowids received from the first scan are expected in one unique tree
+    */
+    ha_rows elems_in_tree= common_info->search_scans[0]->records-
+                           common_info->search_scans[0]->filtered_out ;
+    next->in_memory_cost+= Unique::get_search_cost(elems_in_tree,
+                                                   common_info->compare_factor)* 
+                             ext_index_scan_records;
+    cost= next->in_memory_cost;
+  }
+  else
+  {
+    uint *buff_elems= common_info->buff_elems;
+    uint key_size= common_info->key_size;
+    uint compare_factor= common_info->compare_factor;         
+    ulonglong max_memory_size= common_info->max_memory_size; 
+    
+    records_sent_to_unique+= ext_index_scan_records;
+    cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size,
+                               max_memory_size, compare_factor, TRUE,
+                               &next->in_memory);
+    if (records_filtered_out_by_cpk)
+    {
+      /* Check whether using cpk filter for this scan is beneficial */
+
+      double cost2;
+      bool in_memory2;
+      ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk;
+      cost2=  Unique::get_use_cost(buff_elems, (size_t) records2, key_size,
+                                   max_memory_size, compare_factor, TRUE,
+                                   &in_memory2);
+      cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan,
+                                  compare_factor);
+      if (cost > cost2 + COST_EPS)
+      {
+        cost= cost2;
+        next->in_memory= in_memory2;
+        next->use_cpk_filter= TRUE;
+        records_sent_to_unique= records2;
+      }
+
+    }   
+    if (next->in_memory)
+      next->in_memory_cost= cost;
+  }
+
+  if (next->use_cpk_filter)
+  {
+    next->filtered_scans.set_bit(ext_index_scan->keynr);
+    bitmap_union(&ext_index_scan->used_fields,
+                 &common_info->cpk_scan->used_fields);
+  }
+  next->records_sent_to_unique= records_sent_to_unique;
+       
+  records= records_in_index_intersect_extension(curr, ext_index_scan);
+  if (idx && records > curr->records)
+    return FALSE;
+  if (next->use_cpk_filter && curr->filtered_scans.is_clear_all())
+    records-= records_filtered_out_by_cpk;
+  next->records= records;
+
+  cost+= next->index_read_cost;
+  if (cost >= cutoff_cost)
+    return FALSE;
+
+  cost+= get_sweep_read_cost(common_info->param, records);
+
+  next->cost= cost;
+  next->length= curr->length+1;
+
+  return TRUE;
+}
+
+
+/*
+  Search for the cheapest extensions of range scans used to access a table    
+
+  SYNOPSIS
+    find_index_intersect_best_extension()
+      curr        partial intersection to evaluate all possible extension for 
+
+  DESCRIPTION
+    The function tries to extend the partial plan curr in all possible ways
+    to look for a cheapest index intersection whose cost less than the 
+    cut off value set in curr->common_info.cutoff_cost. 
+*/
+
+static 
+void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr)
+{
+  PARTIAL_INDEX_INTERSECT_INFO next;
+  COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
+  INDEX_SCAN_INFO **index_scans= common_info->search_scans;
+  uint idx= curr->length;
+  INDEX_SCAN_INFO **rem_first_index_scan_ptr= &index_scans[idx];
+  double cost= curr->cost;
+
+  if (cost + COST_EPS < common_info->best_cost)
+  {
+    common_info->best_cost= cost;
+    common_info->best_length= curr->length;
+    common_info->best_records= curr->records;
+    common_info->filtered_scans= curr->filtered_scans;
+    /* common_info->best_uses_cpk <=> at least one scan uses a cpk filter */
+    common_info->best_uses_cpk= !curr->filtered_scans.is_clear_all();
+    uint sz= sizeof(INDEX_SCAN_INFO *) * curr->length;
+    memcpy(common_info->best_intersect, common_info->search_scans, sz);
+    common_info->cutoff_cost= cost;
+  }   
+
+  if (!(*rem_first_index_scan_ptr))
+    return;  
+
+  next.common_info= common_info;
+ 
+  INDEX_SCAN_INFO *rem_first_index_scan= *rem_first_index_scan_ptr;
+  for (INDEX_SCAN_INFO **index_scan_ptr= rem_first_index_scan_ptr;
+       *index_scan_ptr; index_scan_ptr++)
+  {
+    *rem_first_index_scan_ptr= *index_scan_ptr;
+    *index_scan_ptr= rem_first_index_scan;
+    if (check_index_intersect_extension(curr, *rem_first_index_scan_ptr, &next))
+      find_index_intersect_best_extension(&next);
+    *index_scan_ptr= *rem_first_index_scan_ptr;
+    *rem_first_index_scan_ptr= rem_first_index_scan;
+  }
 }
 
 
-typedef struct st_ror_scan_info
+/*
+  Get the plan of the best intersection of range scans used to access a table    
+
+  SYNOPSIS
+    get_best_index_intersect()
+      param         common info about index ranges
+      tree          tree of ranges for indexes than can be intersected
+      read_time     cut off value for the evaluated plans 
+
+  DESCRIPTION
+    The function looks for the cheapest index intersection of the range
+    scans to access a table. The info about the ranges for all indexes
+    is provided by the range optimizer and is passed through the
+    parameters param and tree. Any plan whose cost is greater than read_time
+    is rejected. 
+    After the best index intersection is found the function constructs
+    the structure that manages the execution by the chosen plan.
+
+  RETURN
+    Pointer to the generated execution structure if a success,
+    0 - otherwise.
+*/
+
+static
+TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
+                                              double read_time)
 {
-  uint      idx;      /* # of used key in param->keys */
-  uint      keynr;    /* # of used key in table */
-  ha_rows   records;  /* estimate of # records this scan will return */
+  uint i;
+  uint count;
+  TRP_RANGE **cur_range;
+  TRP_RANGE **range_scans;
+  INDEX_SCAN_INFO *index_scan;
+  COMMON_INDEX_INTERSECT_INFO common;
+  PARTIAL_INDEX_INTERSECT_INFO init;
+  TRP_INDEX_INTERSECT *intersect_trp= NULL;
+  TABLE *table= param->table;
+  
+  
+  DBUG_ENTER("get_best_index_intersect");
 
-  /* Set of intervals over key fields that will be used for row retrieval. */
-  SEL_ARG   *sel_arg;
+  if (prepare_search_best_index_intersect(param, tree, &common, &init,
+                                          read_time))
+    DBUG_RETURN(NULL);
 
-  /* Fields used in the query and covered by this ROR scan. */
-  MY_BITMAP covered_fields;
-  uint      used_fields_covered; /* # of set bits in covered_fields */
-  int       key_rec_length; /* length of key record (including rowid) */
+  find_index_intersect_best_extension(&init);
 
-  /*
-    Cost of reading all index records with values in sel_arg intervals set
-    (assuming there is no need to access full table records)
-  */
-  double    index_read_cost;
-  uint      first_uncovered_field; /* first unused bit in covered_fields */
-  uint      key_components; /* # of parts in the key */
+  if (common.best_length <= 1 && !common.best_uses_cpk)
+    DBUG_RETURN(NULL);
+
+  if (common.best_uses_cpk)
+  {
+    memmove((char *) (common.best_intersect+1), (char *) common.best_intersect,
+            sizeof(INDEX_SCAN_INFO *) * common.best_length);
+    common.best_intersect[0]= common.cpk_scan;
+    common.best_length++;
+  }
+
+  count= common.best_length;
+
+  if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
+                                            sizeof(TRP_RANGE *)*
+                                            count)))
+    DBUG_RETURN(NULL);
+
+  for (i= 0, cur_range= range_scans; i < count; i++)
+  {
+    index_scan= common.best_intersect[i];
+    if ((*cur_range= new (param->mem_root) TRP_RANGE(index_scan->sel_arg,
+                                                     index_scan->idx, 0)))
+    {  
+      TRP_RANGE *trp= *cur_range;  
+      trp->read_cost= index_scan->index_read_cost;  
+      trp->records= index_scan->records;        
+      trp->is_ror= FALSE;
+      trp->mrr_buf_size= 0;
+      table->intersect_keys.set_bit(index_scan->keynr);
+      cur_range++;
+    }
+  }
+  
+  count= tree->index_scans_end - tree->index_scans;
+  for (i= 0; i < count; i++)
+  {
+    index_scan= tree->index_scans[i]; 
+    if (!table->intersect_keys.is_set(index_scan->keynr))
+    {
+      for (uint j= 0; j < common.best_length; j++)
+      {
+	INDEX_SCAN_INFO *scan= common.best_intersect[j];
+        if (same_index_prefix(index_scan->key_info, scan->key_info,
+                              scan->used_key_parts))
+	{
+          table->intersect_keys.set_bit(index_scan->keynr);
+          break;
+        } 
+      }
+    }
+  }
+      
+  if ((intersect_trp= new (param->mem_root)TRP_INDEX_INTERSECT))
+  {
+    intersect_trp->read_cost= common.best_cost;
+    intersect_trp->records= common.best_records;
+    intersect_trp->range_scans= range_scans;
+    intersect_trp->range_scans_end= cur_range;
+    intersect_trp->filtered_scans= common.filtered_scans;
+  }
+  DBUG_RETURN(intersect_trp);
+}
+
+
+typedef struct st_ror_scan_info : INDEX_SCAN_INFO
+{ 
 } ROR_SCAN_INFO;
 
 
@@ -4117,7 +5809,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
   ror_scan->key_rec_length= (param->table->key_info[keynr].key_length +
                              param->table->file->ref_length);
   ror_scan->sel_arg= sel_arg;
-  ror_scan->records= param->table->quick_rows[keynr];
+  ror_scan->records= param->quick_rows[keynr];
 
   if (!(bitmap_buf= (my_bitmap_map*) alloc_root(param->mem_root,
                                                 param->fields_bitmap_size)))
@@ -4137,8 +5829,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
       bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
   }
   ror_scan->index_read_cost=
-    get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
-                             ror_scan->keynr);
+    param->table->file->keyread_time(ror_scan->keynr, 1, ror_scan->records);
   DBUG_RETURN(ror_scan);
 }
 
@@ -4423,7 +6114,7 @@ static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
   }
   if (!prev_covered)
   {
-    double tmp= rows2double(info->param->table->quick_rows[scan->keynr]) /
+    double tmp= rows2double(info->param->quick_rows[scan->keynr]) /
                 rows2double(prev_records);
     DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
     selectivity_mult *= tmp;
@@ -4502,7 +6193,7 @@ static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
   }
   else
   {
-    info->index_records += info->param->table->quick_rows[ror_scan->keynr];
+    info->index_records += info->param->quick_rows[ror_scan->keynr];
     info->index_scan_costs += ror_scan->index_read_cost;
     bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
     if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
@@ -4612,7 +6303,6 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   ROR_SCAN_INFO **cur_ror_scan;
   ROR_SCAN_INFO *cpk_scan= NULL;
   uint cpk_no;
-  bool cpk_scan_used= FALSE;
 
   if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                      sizeof(ROR_SCAN_INFO*)*
@@ -4624,11 +6314,20 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
   {
     ROR_SCAN_INFO *scan;
+    uint key_no;
     if (!tree->ror_scans_map.is_set(idx))
       continue;
+    key_no= param->real_keynr[idx];
+    if (key_no != cpk_no &&
+        param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX)
+    {
+      /* Ignore clustering keys */
+      tree->n_ror_scans--;
+      continue;
+    }
     if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
       return NULL;
-    if (param->real_keynr[idx] == cpk_no)
+    if (key_no == cpk_no)
     {
       cpk_scan= scan;
       tree->n_ror_scans--;
@@ -4714,15 +6413,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   {
     if (ror_intersect_add(intersect, cpk_scan, TRUE) && 
         (intersect->total_cost < min_cost))
-    {
-      cpk_scan_used= TRUE;
       intersect_best= intersect; //just set pointer here
-    }
   }
+  else
+    cpk_scan= 0;                                // Don't use cpk_scan
 
   /* Ok, return ROR-intersect plan if we have found one */
   TRP_ROR_INTERSECT *trp= NULL;
-  if (min_cost < read_time && (cpk_scan_used || best_num > 1))
+  if (min_cost < read_time && (cpk_scan || best_num > 1))
   {
     if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
       DBUG_RETURN(trp);
@@ -4741,7 +6439,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
     set_if_smaller(param->table->quick_condition_rows, best_rows);
     trp->records= best_rows;
     trp->index_scan_costs= intersect_best->index_scan_costs;
-    trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
+    trp->cpk_scan= cpk_scan;
     DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
                         "cost %g, records %lu",
                         trp->read_cost, (ulong) trp->records));
@@ -4753,7 +6451,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
 /*
   Get best covering ROR-intersection.
   SYNOPSIS
-    get_best_covering_ror_intersect()
+    get_best_ntersectcovering_ror_intersect()
       param     Parameter from test_quick_select function.
       tree      SEL_TREE with sets of intervals for different keys.
       read_time Don't return table read plans with cost > read_time.
@@ -4925,11 +6623,12 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                        bool update_tbl_stats,
                                        double read_time)
 {
-  int idx;
+  uint idx;
   SEL_ARG **key,**end, **key_to_read= NULL;
   ha_rows UNINIT_VAR(best_records);              /* protected by key_to_read */
+  uint    UNINIT_VAR(best_mrr_flags),            /* protected by key_to_read */
+          UNINIT_VAR(best_buf_size);             /* protected by key_to_read */
   TRP_RANGE* read_plan= NULL;
-  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
   DBUG_ENTER("get_key_scans_params");
   /*
     Note that there may be trees that have type SEL_TREE::KEY but contain no
@@ -4940,14 +6639,23 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                       "tree scans"););
   tree->ror_scans_map.clear_all();
   tree->n_ror_scans= 0;
-  for (idx= 0,key=tree->keys, end=key+param->keys;
-       key != end ;
-       key++,idx++)
+  tree->index_scans= 0;
+  if (!tree->keys_map.is_clear_all())
+  {
+    tree->index_scans=
+      (INDEX_SCAN_INFO **) alloc_root(param->mem_root,
+                                      sizeof(INDEX_SCAN_INFO *) * param->keys);
+  }
+  tree->index_scans_end= tree->index_scans;                                                  
+  for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++)
   {
-    ha_rows found_records;
-    double found_read_time;
     if (*key)
     {
+      ha_rows found_records;
+      COST_VECT cost;
+      double found_read_time;
+      uint mrr_flags, buf_size;
+      INDEX_SCAN_INFO *index_scan;
       uint keynr= param->real_keynr[idx];
       if ((*key)->type == SEL_ARG::MAYBE_KEY ||
           (*key)->maybe_flag)
@@ -4956,48 +6664,37 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
       bool read_index_only= index_read_must_be_used ? TRUE :
                             (bool) param->table->covering_keys.is_set(keynr);
 
-      found_records= check_quick_select(param, idx, *key, update_tbl_stats);
-      if (param->is_ror_scan)
+      found_records= check_quick_select(param, idx, read_index_only, *key,
+                                        update_tbl_stats, &mrr_flags,
+                                        &buf_size, &cost);
+
+      if (found_records != HA_POS_ERROR && tree->index_scans &&
+          (index_scan= (INDEX_SCAN_INFO *)alloc_root(param->mem_root,
+						     sizeof(INDEX_SCAN_INFO))))
+      {
+        index_scan->idx= idx;
+        index_scan->keynr= keynr;
+        index_scan->key_info= &param->table->key_info[keynr];
+        index_scan->used_key_parts= param->max_key_part+1;
+        index_scan->range_count= param->range_count;
+        index_scan->records= found_records;
+        index_scan->sel_arg= *key;
+        *tree->index_scans_end++= index_scan;
+      }        
+      if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
       {
         tree->n_ror_scans++;
         tree->ror_scans_map.set_bit(idx);
       }
-      double cpu_cost= (double) found_records / TIME_FOR_COMPARE;
-      if (found_records != HA_POS_ERROR && found_records > 2 &&
-          read_index_only &&
-          (param->table->file->index_flags(keynr, param->max_key_part,1) &
-           HA_KEYREAD_ONLY) &&
-          !(pk_is_clustered && keynr == param->table->s->primary_key))
-      {
-        /*
-          We can resolve this by only reading through this key. 
-          0.01 is added to avoid races between range and 'index' scan.
-        */
-        found_read_time= get_index_only_read_time(param,found_records,keynr) +
-                         cpu_cost + 0.01;
-      }
-      else
-      {
-        /*
-          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
-          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
-        */
-	found_read_time= param->table->file->read_time(keynr,
-                                                       param->range_count,
-                                                       found_records) +
-			 cpu_cost + 0.01;
-      }
-      DBUG_PRINT("info",("key %s: found_read_time: %g (cur. read_time: %g)",
-                         param->table->key_info[keynr].name, found_read_time,
-                         read_time));
-
-      if (read_time > found_read_time && found_records != HA_POS_ERROR)
+      if (found_records != HA_POS_ERROR &&
+          read_time > (found_read_time= cost.total_cost()))
       {
         read_time=    found_read_time;
         best_records= found_records;
         key_to_read=  key;
+        best_mrr_flags= mrr_flags;
+        best_buf_size=  buf_size;
       }
-
     }
   }
 
@@ -5006,11 +6703,13 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
   if (key_to_read)
   {
     idx= key_to_read - tree->keys;
-    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
+    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx,
+                                                    best_mrr_flags)))
     {
       read_plan->records= best_records;
       read_plan->is_ror= tree->ror_scans_map.is_set(idx);
       read_plan->read_cost= read_time;
+      read_plan->mrr_buf_size= best_buf_size;
       DBUG_PRINT("info",
                  ("Returning range plan for key %s, cost %g, records %lu",
                   param->table->key_info[param->real_keynr[idx]].name,
@@ -5051,6 +6750,36 @@ QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
   return quick_imerge;
 }
 
+
+QUICK_SELECT_I *TRP_INDEX_INTERSECT::make_quick(PARAM *param,
+                                                bool retrieve_full_rows,
+                                                MEM_ROOT *parent_alloc)
+{
+  QUICK_INDEX_INTERSECT_SELECT *quick_intersect;
+  QUICK_RANGE_SELECT *quick;
+  /* index_merge always retrieves full rows, ignore retrieve_full_rows */
+  if (!(quick_intersect= new QUICK_INDEX_INTERSECT_SELECT(param->thd, param->table)))
+    return NULL;
+
+  quick_intersect->records= records;
+  quick_intersect->read_time= read_cost;
+  quick_intersect->filtered_scans= filtered_scans;
+  for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
+       range_scan++)
+  {
+    if (!(quick= (QUICK_RANGE_SELECT*)
+          ((*range_scan)->make_quick(param, FALSE, &quick_intersect->alloc)))||
+        quick_intersect->push_quick_back(quick))
+    {
+      delete quick;
+      delete quick_intersect;
+      return NULL;
+    }
+  }
+  return quick_intersect;
+}
+
+
 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
                                               bool retrieve_full_rows,
                                               MEM_ROOT *parent_alloc)
@@ -5073,8 +6802,10 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
     for (; first_scan != last_scan;++first_scan)
     {
       if (!(quick= get_quick_select(param, (*first_scan)->idx,
-                                    (*first_scan)->sel_arg, alloc)) ||
-          quick_intrsect->push_quick_back(quick))
+                                    (*first_scan)->sel_arg,
+                                    HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
+                                    0, alloc)) ||
+          quick_intrsect->push_quick_back(alloc, quick))
       {
         delete quick_intrsect;
         DBUG_RETURN(NULL);
@@ -5083,7 +6814,9 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
     if (cpk_scan)
     {
       if (!(quick= get_quick_select(param, cpk_scan->idx,
-                                    cpk_scan->sel_arg, alloc)))
+                                    cpk_scan->sel_arg,
+                                    HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
+                                    0, alloc)))
       {
         delete quick_intrsect;
         DBUG_RETURN(NULL);
@@ -5496,11 +7229,10 @@ static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param,
   Item_equal *item_equal= field_item->item_equal;
   if (item_equal)
   {
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
-    while ((item= it++))
+    Item_equal_fields_iterator it(*item_equal);
+    while (it++)
     {
-      Field *f= item->field;
+      Field *f= it.get_curr_field();
       if (field->eq(f))
         continue;
       if (!((ref_tables | f->table->map) & param_comp))
@@ -5563,7 +7295,7 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
     DBUG_RETURN(tree);
   }
   /* Here when simple cond */
-  if (cond->const_item())
+  if (cond->const_item() && !cond->is_expensive())
   {
     /*
       During the cond->val_int() evaluation we can come across a subselect 
@@ -5649,13 +7381,13 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
   case Item_func::MULT_EQUAL_FUNC:
   {
     Item_equal *item_equal= (Item_equal *) cond;    
-    if (!(value= item_equal->get_const()))
+    if (!(value= item_equal->get_const()) || value->is_expensive())
       DBUG_RETURN(0);
-    Item_equal_iterator it(*item_equal);
+    Item_equal_fields_iterator it(*item_equal);
     ref_tables= value->used_tables();
-    while ((field_item= it++))
+    while (it++)
     {
-      Field *field= field_item->field;
+      Field *field= it.get_curr_field();
       Item_result cmp_type= field->cmp_type();
       if (!((ref_tables | field->table->map) & param_comp))
       {
@@ -5682,6 +7414,9 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
     }
     else
       DBUG_RETURN(0);
+    if (value && value->is_expensive())
+      DBUG_RETURN(0);
+
     ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv);
   }
 
@@ -5730,6 +7465,7 @@ get_mm_parts(RANGE_OPT_PARAM *param, COND *cond_func, Field *field,
 	  DBUG_RETURN(0);			// OOM
       }
       sel_arg->part=(uchar) key_part->part;
+      sel_arg->max_part_no= sel_arg->part+1;
       tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
       tree->keys_map.set_bit(key_part->key);
     }
@@ -5750,7 +7486,6 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
   SEL_ARG *tree= 0;
   MEM_ROOT *alloc= param->mem_root;
   uchar *str;
-  ulonglong orig_sql_mode;
   int err;
   DBUG_ENTER("get_mm_leaf");
 
@@ -5920,16 +7655,8 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
     We can't always use indexes when comparing a string index to a number
     cmp_type() is checked to allow compare of dates to numbers
   */
-  if (field->result_type() == STRING_RESULT &&
-      value->result_type() != STRING_RESULT &&
-      field->cmp_type() != value->result_type())
+  if (field->cmp_type() == STRING_RESULT && value->cmp_type() != STRING_RESULT)
     goto end;
-  /* For comparison purposes allow invalid dates like 2000-01-32 */
-  orig_sql_mode= field->table->in_use->variables.sql_mode;
-  if (value->real_item()->type() == Item::STRING_ITEM &&
-      (field->type() == MYSQL_TYPE_DATE ||
-       field->type() == MYSQL_TYPE_DATETIME))
-    field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
   err= value->save_in_field_no_warnings(field, 1);
   if (err > 0)
   {
@@ -5941,7 +7668,6 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
       {
         tree= new (alloc) SEL_ARG(field, 0, 0);
         tree->type= SEL_ARG::IMPOSSIBLE;
-        field->table->in_use->variables.sql_mode= orig_sql_mode;
         goto end;
       }
       else
@@ -5975,10 +7701,7 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
           */
         }
         else
-        {
-          field->table->in_use->variables.sql_mode= orig_sql_mode;
           goto end;
-        }
       }
     }
 
@@ -6001,12 +7724,10 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
   }
   else if (err < 0)
   {
-    field->table->in_use->variables.sql_mode= orig_sql_mode;
     /* This happens when we try to insert a NULL field in a not null column */
     tree= &null_element;                        // cmp with NULL is never TRUE
     goto end;
   }
-  field->table->in_use->variables.sql_mode= orig_sql_mode;
 
   /*
     Any sargable predicate except "<=>" involving NULL as a constant is always
@@ -6181,13 +7902,138 @@ sel_add(SEL_ARG *key1,SEL_ARG *key2)
   return root;
 }
 
-#define CLONE_KEY1_MAYBE 1
-#define CLONE_KEY2_MAYBE 2
-#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
 
+/* 
+  Build a range tree for the conjunction of the range parts of two trees
 
-static SEL_TREE *
-tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
+  SYNOPSIS
+    and_range_trees()
+      param           Context info for the operation
+      tree1           SEL_TREE for the first conjunct          
+      tree2           SEL_TREE for the second conjunct
+      result          SEL_TREE for the result
+
+  DESCRIPTION
+    This function takes range parts of two trees tree1 and tree2 and builds
+    a range tree for the conjunction of the formulas that these two range parts
+    represent.
+    More exactly: 
+    if the range part of tree1 represents the normalized formula 
+      R1_1 AND ... AND R1_k,
+    and the range part of tree2 represents the normalized formula
+      R2_1 AND ... AND R2_k,
+    then the range part of the result represents the formula:
+     RT = R_1 AND ... AND R_k, where R_i=(R1_i AND R2_i) for each i from [1..k]
+
+    The function assumes that tree1 is never equal to tree2. At the same
+    time the tree result can be the same as tree1 (but never as tree2).
+    If result==tree1 then rt replaces the range part of tree1 leaving
+    imerges as they are.
+    if result!=tree1 than it is assumed that the SEL_ARG trees in tree1 and
+    tree2 should be preserved. Otherwise they can be destroyed.
+
+  RETURN 
+    1    if the type the result tree is  SEL_TREE::IMPOSSIBLE
+    0    otherwise    
+*/
+
+static
+int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2,
+                    SEL_TREE *result)
+{
+  DBUG_ENTER("and_ranges");
+  key_map  result_keys;
+  result_keys.clear_all();
+  key_map anded_keys= tree1->keys_map;
+  anded_keys.merge(tree2->keys_map);
+  int key_no;
+  key_map::Iterator it(anded_keys);
+  while ((key_no= it++) != key_map::Iterator::BITMAP_END)
+  {
+    uint flag=0;
+    SEL_ARG *key1= tree1->keys[key_no];
+    SEL_ARG *key2= tree2->keys[key_no];
+    if (key1 && !key1->simple_key())
+      flag|= CLONE_KEY1_MAYBE;
+    if (key2 && !key2->simple_key())
+      flag|=CLONE_KEY2_MAYBE;
+    if (result != tree1)
+    { 
+      if (key1)
+        key1->incr_refs();
+      if (key2)
+        key2->incr_refs();
+    }
+    SEL_ARG *key;
+    if ((result->keys[key_no]= key =key_and(param, key1, key2, flag)))
+    {
+      if (key && key->type == SEL_ARG::IMPOSSIBLE)
+      {
+	result->type= SEL_TREE::IMPOSSIBLE;
+        DBUG_RETURN(1);
+      }
+      result_keys.set_bit(key_no);
+#ifdef EXTRA_DEBUG
+      if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) 
+        key->test_use_count(key);
+#endif
+    }
+  }
+  result->keys_map= result_keys;
+  DBUG_RETURN(0);
+}
+  
+
+/*
+  Build a SEL_TREE for a conjunction out of such trees for the conjuncts
+
+  SYNOPSIS
+    tree_and()
+      param           Context info for the operation
+      tree1           SEL_TREE for the first conjunct          
+      tree2           SEL_TREE for the second conjunct
+
+  DESCRIPTION
+    This function builds a tree for the formula (A AND B) out of the trees
+    tree1 and tree2 that has been built for the formulas A and B respectively.
+
+    In a general case
+      tree1 represents the formula RT1 AND MT1,
+        where RT1 = R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1;
+      tree2 represents the formula RT2 AND MT2 
+        where RT2 = R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2.
+
+    The result tree will represent the formula of the the following structure:
+      RT AND MT1 AND MT2 AND RT1MT2 AND RT2MT1, such that
+        rt is a tree obtained by range intersection of trees tree1 and tree2,
+        RT1MT2 = RT1M2_1 AND ... AND RT1M2_l2,
+        RT2MT1 = RT2M1_1 AND ... AND RT2M1_l1,
+        where rt1m2_i (i=1,...,l2) is the result of the pushdown operation
+        of range tree rt1 into imerge m2_i, while rt2m1_j (j=1,...,l1) is the
+        result of the pushdown operation of range tree rt2 into imerge m1_j.
+
+    RT1MT2/RT2MT is empty if MT2/MT1 is empty.
+ 
+    The range intersection of two range trees is produced by the function
+    and_range_trees. The pushdown of a range tree to a imerge is performed
+    by the function imerge_list_and_tree. This function may produce imerges
+    containing only one range tree. Such trees are intersected with rt and 
+    the result of intersection is returned as the range part of the result
+    tree, while the corresponding imerges are removed altogether from its
+    imerge part. 
+    
+  NOTE.
+    The pushdown operation of range trees into imerges is needed to be able
+    to construct valid imerges for the condition like this:
+      key1_p1=c1 AND (key1_p2 BETWEEN c21 AND c22 OR key2 < c2)
+
+  RETURN
+    The result tree, if a success
+    0 - otherwise.        
+*/
+
+static 
+SEL_TREE *tree_and(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2)
 {
   DBUG_ENTER("tree_and");
   if (!tree1)
@@ -6209,87 +8055,216 @@ tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
     tree1->type=SEL_TREE::KEY_SMALLER;
     DBUG_RETURN(tree1);
   }
-  key_map  result_keys;
-  result_keys.clear_all();
-  
-  /* Join the trees key per key */
-  SEL_ARG **key1,**key2,**end;
-  for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
-       key1 != end ; key1++,key2++)
+
+  if (!tree1->merges.is_empty())
+    imerge_list_and_tree(param, &tree1->merges, tree2);
+  if (!tree2->merges.is_empty())
+    imerge_list_and_tree(param, &tree2->merges, tree1);
+  if (and_range_trees(param, tree1, tree2, tree1))
+    DBUG_RETURN(tree1);
+  imerge_list_and_list(&tree1->merges, &tree2->merges);
+  eliminate_single_tree_imerges(param, tree1);
+  DBUG_RETURN(tree1);
+}
+
+
+/*
+  Eliminate single tree imerges in a SEL_TREE objects
+
+  SYNOPSIS
+    eliminate_single_tree_imerges()
+      param      Context info for the function
+      tree       SEL_TREE where single tree imerges are to be eliminated 
+
+  DESCRIPTION
+    For each imerge in 'tree' that contains only one disjunct tree, i.e.
+    for any imerge of the form m=rt, the function performs and operation
+    the range part of tree, replaces rt the with the result of anding and
+    removes imerge m from the the merge part of 'tree'.
+
+  RETURN VALUE
+    none          
+*/
+
+static
+void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, SEL_TREE *tree)
+{
+  SEL_IMERGE *imerge;
+  List<SEL_IMERGE> merges= tree->merges;
+  List_iterator<SEL_IMERGE> it(merges);
+  tree->merges.empty();
+  while ((imerge= it++))
   {
-    uint flag=0;
-    if (*key1 || *key2)
-    {
-      if (*key1 && !(*key1)->simple_key())
-	flag|=CLONE_KEY1_MAYBE;
-      if (*key2 && !(*key2)->simple_key())
-	flag|=CLONE_KEY2_MAYBE;
-      *key1=key_and(param, *key1, *key2, flag);
-      if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
-      {
-	tree1->type= SEL_TREE::IMPOSSIBLE;
-        DBUG_RETURN(tree1);
-      }
-      result_keys.set_bit(key1 - tree1->keys);
-#ifdef EXTRA_DEBUG
-        if (*key1 && param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) 
-          (*key1)->test_use_count(*key1);
-#endif
+    if (imerge->trees+1 == imerge->trees_next)
+    {
+      tree= tree_and(param, tree, *imerge->trees);
+      it.remove();
     }
   }
-  tree1->keys_map= result_keys;
-  /* dispose index_merge if there is a "range" option */
-  if (!result_keys.is_clear_all())
-  {
-    tree1->merges.empty();
-    DBUG_RETURN(tree1);
-  }
+  tree->merges= merges;
+} 
 
-  /* ok, both trees are index_merge trees */
-  imerge_list_and_list(&tree1->merges, &tree2->merges);
-  DBUG_RETURN(tree1);
+
+/*
+  For two trees check that there are indexes with ranges in both of them  
+ 
+  SYNOPSIS
+    sel_trees_have_common_keys()
+      tree1           SEL_TREE for the first tree
+      tree2           SEL_TREE for the second tree
+      common_keys OUT bitmap of all indexes with ranges in both trees
+
+  DESCRIPTION
+    For two trees tree1 and tree1 the function checks if there are indexes
+    in their range parts such that SEL_ARG trees are defined for them in the
+    range parts of both trees. The function returns the bitmap of such 
+    indexes in the parameter common_keys.
+
+  RETURN 
+    TRUE    if there are such indexes (common_keys is nor empty)
+    FALSE   otherwise
+*/
+
+static
+bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, 
+                                key_map *common_keys)
+{
+  *common_keys= tree1->keys_map;
+  common_keys->intersect(tree2->keys_map);
+  return !common_keys->is_clear_all();
 }
 
 
 /*
-  Check if two SEL_TREES can be combined into one (i.e. a single key range
-  read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
-  using index_merge.
+  Check whether range parts of two trees can be ored for some indexes
+
+  SYNOPSIS
+    sel_trees_can_be_ored()
+      param              Context info for the function
+      tree1              SEL_TREE for the first tree
+      tree2              SEL_TREE for the second tree
+      common_keys IN/OUT IN: bitmap of all indexes with SEL_ARG in both trees
+                        OUT: bitmap of all indexes that can be ored
+
+  DESCRIPTION
+    For two trees tree1 and tree2 and the bitmap common_keys containing
+    bits for indexes that have SEL_ARG trees in range parts of both trees
+    the function checks if there are indexes for which SEL_ARG trees can
+    be ored. Two SEL_ARG trees for the same index can be ored if the most
+    major components of the index used in these trees coincide. If the 
+    SEL_ARG trees for an index cannot be ored the function clears the bit
+    for this index in the bitmap common_keys.
+
+    The function does not verify that indexes marked in common_keys really
+    have SEL_ARG trees in both tree1 and tree2. It assumes that this is true.
+
+  NOTE
+    The function sel_trees_can_be_ored is usually used in pair with the
+    function sel_trees_have_common_keys.
+
+  RETURN
+    TRUE    if there are indexes for which SEL_ARG trees can be ored 
+    FALSE   otherwise
 */
 
-bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, 
-                           RANGE_OPT_PARAM* param)
+static
+bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
+                           SEL_TREE *tree1, SEL_TREE *tree2, 
+                           key_map *common_keys)
 {
-  key_map common_keys= tree1->keys_map;
   DBUG_ENTER("sel_trees_can_be_ored");
-  common_keys.intersect(tree2->keys_map);
+  if (!sel_trees_have_common_keys(tree1, tree2, common_keys))
+    DBUG_RETURN(FALSE);
+  int key_no;
+  key_map::Iterator it(*common_keys);
+  while ((key_no= it++) != key_map::Iterator::BITMAP_END)
+  {
+    DBUG_ASSERT(tree1->keys[key_no] && tree2->keys[key_no]);
+    /* Trees have a common key, check if they refer to the same key part */
+    if (tree1->keys[key_no]->part != tree2->keys[key_no]->part)
+      common_keys->clear_bit(key_no);
+  }
+  DBUG_RETURN(!common_keys->is_clear_all());
+}
+
+/*
+  Check whether range parts of two trees must be ored for some indexes
+
+  SYNOPSIS
+    sel_trees_must_be_ored()
+      param              Context info for the function
+      tree1              SEL_TREE for the first tree
+      tree2              SEL_TREE for the second tree
+      ordable_keys       bitmap of SEL_ARG trees that can be ored
+
+  DESCRIPTION
+    For two trees tree1 and tree2 the function checks whether they must be
+    ored. The function assumes that the bitmap ordable_keys contains bits for
+    those corresponding pairs of SEL_ARG trees from tree1 and tree2 that can
+    be ored.
+    We believe that tree1 and tree2 must be ored if any pair of SEL_ARG trees
+    r1 and r2, such that r1 is from tree1 and r2 is from tree2 and both
+    of them are marked in ordable_keys, can be merged.
+    
+  NOTE
+    The function sel_trees_must_be_ored as a rule is used in pair with the
+    function sel_trees_can_be_ored.
+
+  RETURN
+    TRUE    if there are indexes for which SEL_ARG trees must be ored 
+    FALSE   otherwise
+*/
+
+static
+bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
+                            SEL_TREE *tree1, SEL_TREE *tree2,
+                            key_map oredable_keys)
+{
+  key_map tmp;
+  DBUG_ENTER("sel_trees_must_be_ored");
 
-  if (common_keys.is_clear_all())
+  tmp= tree1->keys_map;
+  tmp.merge(tree2->keys_map);
+  tmp.subtract(oredable_keys);
+  if (!tmp.is_clear_all())
     DBUG_RETURN(FALSE);
 
-  /* trees have a common key, check if they refer to same key part */
-  SEL_ARG **key1,**key2;
-  for (uint key_no=0; key_no < param->keys; key_no++)
+  int idx1, idx2;
+  key_map::Iterator it1(oredable_keys);
+  while ((idx1= it1++) != key_map::Iterator::BITMAP_END)
   {
-    if (common_keys.is_set(key_no))
+    KEY_PART *key1_init= param->key[idx1]+tree1->keys[idx1]->part;
+    KEY_PART *key1_end= param->key[idx1]+tree1->keys[idx1]->max_part_no;
+    key_map::Iterator it2(oredable_keys);
+    while ((idx2= it2++) != key_map::Iterator::BITMAP_END)
     {
-      key1= tree1->keys + key_no;
-      key2= tree2->keys + key_no;
-      if ((*key1)->part == (*key2)->part)
-      {
-        DBUG_RETURN(TRUE);
+      if (idx2 <= idx1)
+        continue;
+      
+      KEY_PART *key2_init= param->key[idx2]+tree2->keys[idx2]->part;
+      KEY_PART *key2_end= param->key[idx2]+tree2->keys[idx2]->max_part_no;
+      KEY_PART *part1, *part2;
+      for (part1= key1_init, part2= key2_init;
+           part1 < key1_end && part2 < key2_end;
+           part1++, part2++)
+      { 
+        if (!part1->field->eq(part2->field))
+          DBUG_RETURN(FALSE);
       }
     }
   }
-  DBUG_RETURN(FALSE);
-}
+      
+  DBUG_RETURN(TRUE);
+}  
 
 
 /*
-  Remove the trees that are not suitable for record retrieval.
+  Remove the trees that are not suitable for record retrieval
+
   SYNOPSIS
-    param  Range analysis parameter
-    tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
+    remove_nonrange_trees()
+      param  Context info for the function
+      tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
  
   DESCRIPTION
     This function walks through tree->keys[] and removes the SEL_ARG* trees
@@ -6300,41 +8275,36 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
 
     A SEL_ARG* tree cannot be used to construct quick select if it has
     tree->part != 0. (e.g. it could represent "keypart2 < const").
-
-    WHY THIS FUNCTION IS NEEDED
     
     Normally we allow construction of SEL_TREE objects that have SEL_ARG
-    trees that do not allow quick range select construction. For example for
-    " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
+    trees that do not allow quick range select construction.
+    For example:
+    for " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
     tree1= SEL_TREE { SEL_ARG{keypart1=1} }
     tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
                                                from this
     call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
                                    tree.
-    
-    There is an exception though: when we construct index_merge SEL_TREE,
-    any SEL_ARG* tree that cannot be used to construct quick range select can
-    be removed, because current range analysis code doesn't provide any way
-    that tree could be later combined with another tree.
-    Consider an example: we should not construct
-    st1 = SEL_TREE { 
-      merges = SEL_IMERGE { 
-                            SEL_TREE(t.key1part1 = 1), 
-                            SEL_TREE(t.key2part2 = 2)   -- (*)
-                          } 
-                   };
-    because 
-     - (*) cannot be used to construct quick range select, 
-     - There is no execution path that would cause (*) to be converted to 
-       a tree that could be used.
-
-    The latter is easy to verify: first, notice that the only way to convert
-    (*) into a usable tree is to call tree_and(something, (*)).
-
-    Second look at what tree_and/tree_or function would do when passed a
-    SEL_TREE that has the structure like st1 tree has, and conlcude that 
-    tree_and(something, (*)) will not be called.
 
+    Another example:
+    tree3= SEL_TREE { SEL_ARG{key1part1 = 1} }
+    tree4= SEL_TREE { SEL_ARG{key2part2 = 2} }  -- can't make quick range select
+                                               from this
+    call tree_or(tree3, tree4) -- creates a SEL_MERGE ot of which no index
+    merge can be constructed, but it is potentially useful, as anding it with
+    tree5= SEL_TREE { SEL_ARG{key2part1 = 3} } creates an index merge that
+    represents the formula
+      key1part1=1 AND key2part1=3 OR key2part1=3 AND key2part2=2 
+    for which an index merge can be built. 
+
+    Any final SEL_TREE may contain SEL_ARG trees for which no quick select
+    can be built. Such SEL_ARG trees should be removed from the range part
+    before different range scans are evaluated. Such SEL_ARG trees also should
+    be removed from all range trees of each index merge before different
+    possible index merge plans are evaluated. If after this removal one
+    of the range trees in the index merge becomes empty the whole index merge
+    must be discarded.
+       
   RETURN
     0  Ok, some suitable trees left
     1  No tree->keys[] left.
@@ -6360,6 +8330,74 @@ static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
 }
 
 
+/*
+  Build a SEL_TREE for a disjunction out of such trees for the disjuncts
+
+  SYNOPSIS
+    tree_or()
+      param           Context info for the operation
+      tree1           SEL_TREE for the first disjunct          
+      tree2           SEL_TREE for the second disjunct
+
+  DESCRIPTION
+    This function builds a tree for the formula (A OR B) out of the trees
+    tree1 and tree2 that has been built for the formulas A and B respectively.
+
+    In a general case
+      tree1 represents the formula RT1 AND MT1,
+        where RT1=R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1;
+      tree2 represents the formula RT2 AND MT2 
+        where RT2=R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2.
+
+    The function constructs the result tree according the formula
+      (RT1 OR RT2) AND (MT1 OR RT1) AND (MT2 OR RT2) AND (MT1 OR MT2)
+    that is equivalent to the formula (RT1 AND MT1) OR (RT2 AND MT2).
+
+    To limit the number of produced imerges the function considers
+    a weaker formula than the original one:
+      (RT1 AND M1_1) OR (RT2 AND M2_1) 
+    that is equivalent to:
+      (RT1 OR RT2)                  (1)
+        AND 
+      (M1_1 OR M2_1)                (2)
+        AND
+      (M1_1 OR RT2)                 (3)
+        AND
+      (M2_1 OR RT1)                 (4)
+
+    For the first conjunct (1) the function builds a tree with a range part
+    and, possibly, one imerge. For the other conjuncts (2-4)the function
+    produces sets of imerges. All constructed imerges are included into the
+    result tree.
+    
+    For the formula (1) the function produces the tree representing a formula  
+    of the structure RT [AND M], such that:
+     - the range tree rt contains the result of oring SEL_ARG trees from rt1
+       and rt2
+     - the imerge m consists of two range trees rt1 and rt2.
+    The imerge m is added if it's not true that rt1 and rt2 must be ored
+    If rt1 and rt2 can't be ored rt is empty and only m is produced for (1).
+
+    To produce imerges for the formula (2) the function calls the function
+    imerge_list_or_list passing it the merge parts of tree1 and tree2 as
+    parameters.
+
+    To produce imerges for the formula (3) the function calls the function
+    imerge_list_or_tree passing it the imerge m1_1 and the range tree rt2 as
+    parameters. Similarly, to produce imerges for the formula (4) the function
+    calls the function imerge_list_or_tree passing it the imerge m2_1 and the
+    range tree rt1.
+
+    If rt1 is empty then the trees for (1) and (4) are empty.
+    If rt2 is empty then the trees for (1) and (3) are empty.
+    If mt1 is empty then the trees for (2) and (3) are empty.
+    If mt2 is empty then the trees for (2) and (4) are empty.
+
+  RETURN
+    The result tree for the operation if a success
+    0 - otherwise
+*/
+
 static SEL_TREE *
 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
 {
@@ -6375,74 +8413,100 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
   if (tree2->type == SEL_TREE::MAYBE)
     DBUG_RETURN(tree2);
 
-  SEL_TREE *result= 0;
-  key_map  result_keys;
-  result_keys.clear_all();
-  if (sel_trees_can_be_ored(tree1, tree2, param))
+  SEL_TREE *result= NULL;
+  key_map result_keys;
+  key_map ored_keys;
+  SEL_TREE *rtree[2]= {NULL,NULL};
+  SEL_IMERGE *imerge[2]= {NULL, NULL};
+  bool no_ranges1= tree1->without_ranges();
+  bool no_ranges2= tree2->without_ranges();
+  bool no_merges1= tree1->without_imerges();
+  bool no_merges2= tree2->without_imerges();
+  if (!no_ranges1 && !no_merges2)
   {
-    /* Join the trees key per key */
-    SEL_ARG **key1,**key2,**end;
-    for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
-         key1 != end ; key1++,key2++)
-    {
-      *key1=key_or(param, *key1, *key2);
-      if (*key1)
-      {
-        result=tree1;				// Added to tree1
-        result_keys.set_bit(key1 - tree1->keys);
-#ifdef EXTRA_DEBUG
-        if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) 
-          (*key1)->test_use_count(*key1);
-#endif
-      }
-    }
-    if (result)
-      result->keys_map= result_keys;
+    rtree[0]= new SEL_TREE(tree1, TRUE, param);
+    imerge[1]= new SEL_IMERGE(tree2->merges.head(), 0, param);
   }
-  else
+  if (!no_ranges2 && !no_merges1)
+  {
+    rtree[1]= new SEL_TREE(tree2, TRUE, param);
+    imerge[0]= new SEL_IMERGE(tree1->merges.head(), 0, param);
+  }
+  bool no_imerge_from_ranges= FALSE;
+  if (!(result= new SEL_TREE()))
+    DBUG_RETURN(result);
+
+  /* Build the range part of the tree for the formula (1) */ 
+  if (sel_trees_can_be_ored(param, tree1, tree2, &ored_keys))
   {
-    /* ok, two trees have KEY type but cannot be used without index merge */
-    if (tree1->merges.is_empty() && tree2->merges.is_empty())
+    bool must_be_ored= sel_trees_must_be_ored(param, tree1, tree2, ored_keys);
+    no_imerge_from_ranges= must_be_ored;
+    key_map::Iterator it(ored_keys);
+    int key_no;
+    while ((key_no= it++) != key_map::Iterator::BITMAP_END)
     {
-      if (param->remove_jump_scans)
+      SEL_ARG *key1= tree1->keys[key_no];
+      SEL_ARG *key2= tree2->keys[key_no];
+      if (!must_be_ored)
       {
-        bool no_trees= remove_nonrange_trees(param, tree1);
-        no_trees= no_trees || remove_nonrange_trees(param, tree2);
-        if (no_trees)
-          DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
+        key1->incr_refs();
+        key2->incr_refs();
       }
-      SEL_IMERGE *merge;
-      /* both trees are "range" trees, produce new index merge structure */
-      if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
-          (result->merges.push_back(merge)) ||
-          (merge->or_sel_tree(param, tree1)) ||
-          (merge->or_sel_tree(param, tree2)))
-        result= NULL;
-      else
-        result->type= tree1->type;
-    }
-    else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
-    {
-      if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
-        result= new SEL_TREE(SEL_TREE::ALWAYS);
-      else
-        result= tree1;
+      if ((result->keys[key_no]= key_or(param, key1, key2)))
+        result->keys_map.set_bit(key_no);
     }
-    else
-    {
-      /* one tree is index merge tree and another is range tree */
-      if (tree1->merges.is_empty())
-        swap_variables(SEL_TREE*, tree1, tree2);
+    result->type= tree1->type;
+  }
       
-      if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
-         DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
-      /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
-      if (imerge_list_or_tree(param, &tree1->merges, tree2))
-        result= new SEL_TREE(SEL_TREE::ALWAYS);
-      else
-        result= tree1;
-    }
+  if (no_imerge_from_ranges && no_merges1 && no_merges2)
+  {
+    if (result->keys_map.is_clear_all())
+      result->type= SEL_TREE::ALWAYS;
+    DBUG_RETURN(result);
   }
+
+  SEL_IMERGE *imerge_from_ranges;
+  if (!(imerge_from_ranges= new SEL_IMERGE()))
+    result= NULL;
+  else if (!no_ranges1 && !no_ranges2 && !no_imerge_from_ranges)
+  {
+    /* Build the imerge part of the tree for the formula (1) */
+    SEL_TREE *rt1= tree1;
+    SEL_TREE *rt2= tree2;
+    if (!no_merges1)
+      rt1= new SEL_TREE(tree1, TRUE, param);
+    if (!no_merges2)
+      rt2= new SEL_TREE(tree2, TRUE, param);
+    if (!rt1 || !rt2 ||
+        result->merges.push_back(imerge_from_ranges) ||
+        imerge_from_ranges->or_sel_tree(param, rt1) ||
+        imerge_from_ranges->or_sel_tree(param, rt2))
+      result= NULL;
+  }
+  if (!result)
+    DBUG_RETURN(result);
+
+  result->type= tree1->type;
+
+  if (!no_merges1 && !no_merges2 && 
+      !imerge_list_or_list(param, &tree1->merges, &tree2->merges))
+  {
+    /* Build the imerges for the formula (2) */
+    imerge_list_and_list(&result->merges, &tree1->merges);
+  }
+
+  /* Build the imerges for the formulas (3) and (4) */
+  for (uint i=0; i < 2; i++)
+  {
+    List<SEL_IMERGE> merges;
+    SEL_TREE *rt= rtree[i];
+    SEL_IMERGE *im= imerge[1-i];
+    
+    if (rt && im && !merges.push_back(im) && 
+        !imerge_list_or_tree(param, &merges, rt))
+      imerge_list_and_list(&result->merges, &merges);
+  }
+ 
   DBUG_RETURN(result);
 }
 
@@ -6488,6 +8552,7 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
   if (!key1)
     return &null_element;			// Impossible ranges
   key1->use_count++;
+  key1->max_part_no= max(key2->max_part_no, key2->part+1);
   return key1;
 }
 
@@ -6580,6 +8645,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
   key1->use_count--;
   key2->use_count--;
   SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
+  uint max_part_no= max(key1->max_part_no, key2->max_part_no);
 
   while (e1 && e2)
   {
@@ -6617,6 +8683,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
   key2->free_tree();
   if (!new_tree)
     return &null_element;			// Impossible range
+  new_tree->max_part_no= max_part_no;
   return new_tree;
 }
 
@@ -6722,7 +8789,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
   {
     key1->free_tree();
     key2->free_tree();
-    return 0;					// Can't optimize this
+    return 0;                                   // Can't optimize this
   }
 
   // If one of the key is MAYBE_KEY then the found region may be bigger
@@ -6745,247 +8812,548 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
     {
       swap_variables(SEL_ARG *,key1,key2);
     }
-    if (key1->use_count > 0 || !(key1=key1->clone_tree(param)))
-      return 0;					// OOM
+    if (key1->use_count > 0 && !(key1=key1->clone_tree(param)))
+      return 0;                                 // OOM
   }
 
   // Add tree at key2 to tree at key1
   bool key2_shared=key2->use_count != 0;
   key1->maybe_flag|=key2->maybe_flag;
 
+  /*
+    Notation for illustrations used in the rest of this function: 
+
+      Range: [--------]
+             ^        ^
+             start    stop
+
+      Two overlapping ranges:
+        [-----]               [----]            [--]
+            [---]     or    [---]       or   [-------]
+
+      Ambiguity: *** 
+        The range starts or stops somewhere in the "***" range.
+        Example: a starts before b and may end before/the same plase/after b
+        a: [----***]
+        b:   [---]
+
+      Adjacent ranges:
+        Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
+        a: ----]
+        b:      [----
+   */
+
+  uint max_part_no= max(key1->max_part_no, key2->max_part_no);
+
   for (key2=key2->first(); key2; )
   {
-    SEL_ARG *tmp=key1->find_range(key2);	// Find key1.min <= key2.min
-    int cmp;
+    /*
+      key1 consists of one or more ranges. tmp is the range currently
+      being handled.
+
+      initialize tmp to the latest range in key1 that starts the same
+      place or before the range in key2 starts
+
+      key2:           [------]
+      key1: [---] [-----] [----]
+                  ^
+                  tmp
+    */
+    SEL_ARG *tmp=key1->find_range(key2);
+
+    /*
+      Used to describe how two key values are positioned compared to
+      each other. Consider key_value_a.<cmp_func>(key_value_b):
+
+        -2: key_value_a is smaller than key_value_b, and they are adjacent
+        -1: key_value_a is smaller than key_value_b (not adjacent)
+         0: the key values are equal
+         1: key_value_a is bigger than key_value_b (not adjacent)
+        -2: key_value_a is bigger than key_value_b, and they are adjacent
+
+      Example: "cmp= tmp->cmp_max_to_min(key2)"
+
+      key2:         [--------            (10 <= x ...)
+      tmp:    -----]                      (... x <  10) => cmp==-2
+      tmp:    ----]                       (... x <=  9) => cmp==-1
+      tmp:    ------]                     (... x  = 10) => cmp== 0
+      tmp:    --------]                   (... x <= 12) => cmp== 1
+      (cmp == 2 does not make sense for cmp_max_to_min())
+     */
+    int cmp= 0;
 
     if (!tmp)
     {
-      tmp=key1->first();			// tmp.min > key2.min
+      /*
+        The range in key2 starts before the first range in key1. Use
+        the first range in key1 as tmp.
+
+        key2:     [--------]
+        key1:            [****--] [----]   [-------]
+                         ^
+                         tmp
+      */
+      tmp=key1->first();
       cmp= -1;
     }
-    else if ((cmp=tmp->cmp_max_to_min(key2)) < 0)
-    {						// Found tmp.max < key2.min
+    else if ((cmp= tmp->cmp_max_to_min(key2)) < 0)
+    {
+      /*
+        This is the case:
+        key2:          [-------]
+        tmp:   [----**]
+       */
       SEL_ARG *next=tmp->next;
-      /* key1 on the left of key2 non-overlapping */
       if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part))
       {
-	// Join near ranges like tmp.max < 0 and key2.min >= 0
-	SEL_ARG *key2_next=key2->next;
-	if (key2_shared)
-	{
-	  if (!(key2=new SEL_ARG(*key2)))
-	    return 0;		// out of memory
-	  key2->increment_use_count(key1->use_count+1);
-	  key2->next=key2_next;			// New copy of key2
-	}
-	key2->copy_min(tmp);
-	if (!(key1=key1->tree_delete(tmp)))
-	{					// Only one key in tree
-	  key1=key2;
-	  key1->make_root();
-	  key2=key2_next;
-	  break;
-	}
+        /*
+          Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
+
+          This is the case:
+          key2:          [-------]
+          tmp:     [----]
+
+          Result:
+          key2:    [-------------]     => inserted into key1 below
+          tmp:                         => deleted
+        */
+        SEL_ARG *key2_next=key2->next;
+        if (key2_shared)
+        {
+          if (!(key2=new SEL_ARG(*key2)))
+            return 0;           // out of memory
+          key2->increment_use_count(key1->use_count+1);
+          key2->next=key2_next;                 // New copy of key2
+        }
+
+        key2->copy_min(tmp);
+        if (!(key1=key1->tree_delete(tmp)))
+        {                                       // Only one key in tree
+          key1=key2;
+          key1->make_root();
+          key2=key2_next;
+          break;
+        }
       }
-      if (!(tmp=next))				// tmp.min > key2.min
-	break;					// Copy rest of key2
+      if (!(tmp=next)) // Move to next range in key1. Now tmp.min > key2.min
+        break;         // No more ranges in key1. Copy rest of key2
     }
+
     if (cmp < 0)
-    {						// tmp.min > key2.min
+    {
+      /*
+        This is the case:
+        key2:  [--***]
+        tmp:       [----]
+      */
       int tmp_cmp;
-      if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0) // if tmp.min > key2.max
+      if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0)
       {
-        /* tmp is on the right of key2 non-overlapping */
-	if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
-	{					// ranges are connected
-	  tmp->copy_min_to_min(key2);
-	  key1->merge_flags(key2);
-	  if (tmp->min_flag & NO_MIN_RANGE &&
-	      tmp->max_flag & NO_MAX_RANGE)
-	  {
-	    if (key1->maybe_flag)
-	      return new SEL_ARG(SEL_ARG::MAYBE_KEY);
-	    return 0;
-	  }
-	  key2->increment_use_count(-1);	// Free not used tree
-	  key2=key2->next;
-	  continue;
-	}
-	else
-	{
-	  SEL_ARG *next=key2->next;		// Keys are not overlapping
-	  if (key2_shared)
-	  {
-	    SEL_ARG *cpy= new SEL_ARG(*key2);	// Must make copy
-	    if (!cpy)
-	      return 0;				// OOM
-	    key1=key1->insert(cpy);
-	    key2->increment_use_count(key1->use_count+1);
-	  }
-	  else
-	    key1=key1->insert(key2);		// Will destroy key2_root
-	  key2=next;
-	  continue;
-	}
+        /*
+          This is the case:
+          key2:  [------**]
+          tmp:             [----]
+        */
+        if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
+        {
+          /*
+            Adjacent ranges with equal next_key_part. Merge like this:
+
+            This is the case:
+            key2:    [------]
+            tmp:             [-----]
+
+            Result:
+            key2:    [------]
+            tmp:     [-------------]
+
+            Then move on to next key2 range.
+          */
+          tmp->copy_min_to_min(key2);
+          key1->merge_flags(key2);
+          if (tmp->min_flag & NO_MIN_RANGE &&
+              tmp->max_flag & NO_MAX_RANGE)
+          {
+            if (key1->maybe_flag)
+              return new SEL_ARG(SEL_ARG::MAYBE_KEY);
+            return 0;
+          }
+          key2->increment_use_count(-1);        // Free not used tree
+          key2=key2->next;
+          continue;
+        }
+        else
+        {
+          /*
+            key2 not adjacent to tmp or has different next_key_part.
+            Insert into key1 and move to next range in key2
+            
+            This is the case:
+            key2:  [------**]
+            tmp:             [----]
+
+            Result:
+            key1_  [------**][----]
+                   ^         ^
+                   insert    tmp
+          */
+          SEL_ARG *next=key2->next;
+          if (key2_shared)
+          {
+            SEL_ARG *cpy= new SEL_ARG(*key2);   // Must make copy
+            if (!cpy)
+              return 0;                         // OOM
+            key1=key1->insert(cpy);
+            key2->increment_use_count(key1->use_count+1);
+          }
+          else
+            key1=key1->insert(key2);            // Will destroy key2_root
+          key2=next;
+          continue;
+        }
       }
     }
 
-    /* 
-      tmp.min >= key2.min && tmp.min <= key.max  (overlapping ranges)
-      key2.min <= tmp.min <= key2.max 
-    */  
+    /*
+      The ranges in tmp and key2 are overlapping:
+
+      key2:          [----------] 
+      tmp:        [*****-----*****]
+
+      Corollary: tmp.min <= key2.max
+    */
     if (eq_tree(tmp->next_key_part,key2->next_key_part))
     {
+      // Merge overlapping ranges with equal next_key_part
       if (tmp->is_same(key2))
       {
-        /* 
-          Found exact match of key2 inside key1. 
+        /*
+          Found exact match of key2 inside key1.
           Use the relevant range in key1.
         */
-	tmp->merge_flags(key2);			// Copy maybe flags
-	key2->increment_use_count(-1);		// Free not used tree
+        tmp->merge_flags(key2);                 // Copy maybe flags
+        key2->increment_use_count(-1);          // Free not used tree
       }
       else
       {
-	SEL_ARG *last=tmp;
-        SEL_ARG *first=tmp;
-        /* 
-          Find the last range in tmp that overlaps key2 and has the same 
-          condition on the rest of the keyparts.
+        SEL_ARG *last= tmp;
+        SEL_ARG *first= tmp;
+
+        /*
+          Find the last range in key1 that overlaps key2 and
+          where all ranges first...last have the same next_key_part as
+          key2.
+
+          key2:  [****----------------------*******]
+          key1:     [--]  [----] [---]  [-----] [xxxx]
+                    ^                   ^       ^
+                    first               last    different next_key_part
+
+          Since key2 covers them, the ranges between first and last
+          are merged into one range by deleting first...last-1 from
+          the key1 tree. In the figure, this applies to first and the
+          two consecutive ranges. The range of last is then extended:
+            * last.min: Set to min(key2.min, first.min)
+            * last.max: If there is a last->next that overlaps key2 (i.e.,
+                        last->next has a different next_key_part):
+                                        Set adjacent to last->next.min
+                        Otherwise:      Set to max(key2.max, last.max)
+
+          Result:
+          key2:  [****----------------------*******]
+                    [--]  [----] [---]                   => deleted from key1
+          key1:  [**------------------------***][xxxx]
+                 ^                              ^
+                 tmp=last                       different next_key_part
         */
-	while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
-	       eq_tree(last->next->next_key_part,key2->next_key_part))
-	{
+        while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
+               eq_tree(last->next->next_key_part,key2->next_key_part))
+        {
           /*
-            We've found the last overlapping key1 range in last.
-            This means that the ranges between (and including) the 
-            first overlapping range (tmp) and the last overlapping range
-            (last) are fully nested into the current range of key2 
-            and can safely be discarded. We just need the minimum endpoint
-            of the first overlapping range (tmp) so we can compare it with
-            the minimum endpoint of the enclosing key2 range.
+            last->next is covered by key2 and has same next_key_part.
+            last can be deleted
           */
-	  SEL_ARG *save=last;
-	  last=last->next;
-	  key1=key1->tree_delete(save);
-	}
+          SEL_ARG *save=last;
+          last=last->next;
+          key1=key1->tree_delete(save);
+        }
+        // Redirect tmp to last which will cover the entire range
+        tmp= last;
+
         /*
-          The tmp range (the first overlapping range) could have been discarded
-          by the previous loop. We should re-direct tmp to the new united range 
-          that's taking its place.
+          We need the minimum endpoint of first so we can compare it
+          with the minimum endpoint of the enclosing key2 range.
         */
-        tmp= last;
         last->copy_min(first);
         bool full_range= last->copy_min(key2);
         if (!full_range)
         {
           if (last->next && key2->cmp_max_to_min(last->next) >= 0)
           {
-            last->max_value= last->next->min_value;
-            if (last->next->min_flag & NEAR_MIN)
-              last->max_flag&= ~NEAR_MAX;
-            else
-              last->max_flag|= NEAR_MAX;
+            /*
+              This is the case:
+              key2:    [-------------]
+              key1:  [***------]  [xxxx]
+                     ^            ^
+                     last         different next_key_part
+
+              Extend range of last up to last->next:
+              key2:    [-------------]
+              key1:  [***--------][xxxx]
+            */
+            last->copy_min_to_max(last->next);
           }
           else
+            /*
+              This is the case:
+              key2:    [--------*****]
+              key1:  [***---------]    [xxxx]
+                     ^                 ^
+                     last              different next_key_part
+
+              Extend range of last up to max(last.max, key2.max):
+              key2:    [--------*****]
+              key1:  [***----------**] [xxxx]
+             */
             full_range= last->copy_max(key2);
         }
-	if (full_range)
-	{					// Full range
-	  key1->free_tree();
-	  for (; key2 ; key2=key2->next)
-	    key2->increment_use_count(-1);	// Free not used tree
-	  if (key1->maybe_flag)
-	    return new SEL_ARG(SEL_ARG::MAYBE_KEY);
-	  return 0;
-	}
+        if (full_range)
+        {                                       // Full range
+          key1->free_tree();
+          for (; key2 ; key2=key2->next)
+            key2->increment_use_count(-1);      // Free not used tree
+          if (key1->maybe_flag)
+            return new SEL_ARG(SEL_ARG::MAYBE_KEY);
+          return 0;
+        }
       }
     }
 
     if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
-    {						// tmp.min <= x < key2.min
+    {
+      /*
+        This is the case ("cmp>=0" means that tmp.max >= key2.min):
+        key2:              [----]
+        tmp:     [------------*****]
+      */
+
+      if (!tmp->next_key_part)
+      {
+        /*
+          tmp->next_key_part is empty: cut the range that is covered
+          by tmp from key2. 
+          Reason: (key2->next_key_part OR tmp->next_key_part) will be
+          empty and therefore equal to tmp->next_key_part. Thus, this
+          part of the key2 range is completely covered by tmp.
+        */
+        if (tmp->cmp_max_to_max(key2) >= 0)
+        {
+          /*
+            tmp covers the entire range in key2. 
+            key2:              [----]
+            tmp:     [-----------------]
+
+            Move on to next range in key2
+          */
+          key2->increment_use_count(-1); // Free not used tree
+          key2=key2->next;
+          continue;
+        }
+        else
+        {
+          /*
+            This is the case:
+            key2:           [-------]
+            tmp:     [---------]
+
+            Result:
+            key2:               [---]
+            tmp:     [---------]
+          */
+          key2->copy_max_to_min(tmp);
+          continue;
+        }
+      }
+
+      /*
+        The ranges are overlapping but have not been merged because
+        next_key_part of tmp and key2 differ. 
+        key2:              [----]
+        tmp:     [------------*****]
+
+        Split tmp in two where key2 starts:
+        key2:              [----]
+        key1:    [--------][--*****]
+                 ^         ^
+                 insert    tmp
+      */
       SEL_ARG *new_arg=tmp->clone_first(key2);
       if (!new_arg)
-	return 0;				// OOM
-      if ((new_arg->next_key_part= key1->next_key_part))
-	new_arg->increment_use_count(key1->use_count+1);
+        return 0;                               // OOM
+      if ((new_arg->next_key_part= tmp->next_key_part))
+        new_arg->increment_use_count(key1->use_count+1);
       tmp->copy_min_to_min(key2);
       key1=key1->insert(new_arg);
-    }
+    } // tmp.min >= key2.min due to this if()
 
-    // tmp.min >= key2.min && tmp.min <= key2.max
-    SEL_ARG key(*key2);				// Get copy we can modify
+    /*
+      Now key2.min <= tmp.min <= key2.max:
+      key2:   [---------]
+      tmp:    [****---*****]
+     */
+    SEL_ARG key2_cpy(*key2); // Get copy we can modify
     for (;;)
     {
-      if (tmp->cmp_min_to_min(&key) > 0)
-      {						// key.min <= x < tmp.min
-	SEL_ARG *new_arg=key.clone_first(tmp);
-	if (!new_arg)
-	  return 0;				// OOM
-	if ((new_arg->next_key_part=key.next_key_part))
-	  new_arg->increment_use_count(key1->use_count+1);
-	key1=key1->insert(new_arg);
-      }
-      if ((cmp=tmp->cmp_max_to_max(&key)) <= 0)
-      {						// tmp.min. <= x <= tmp.max
-	tmp->maybe_flag|= key.maybe_flag;
-	key.increment_use_count(key1->use_count+1);
-	tmp->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
-	if (!cmp)				// Key2 is ready
-	  break;
-	key.copy_max_to_min(tmp);
-	if (!(tmp=tmp->next))
-	{
-	  SEL_ARG *tmp2= new SEL_ARG(key);
-	  if (!tmp2)
-	    return 0;				// OOM
-	  key1=key1->insert(tmp2);
-	  key2=key2->next;
-	  goto end;
-	}
-	if (tmp->cmp_min_to_max(&key) > 0)
-	{
-	  SEL_ARG *tmp2= new SEL_ARG(key);
-	  if (!tmp2)
-	    return 0;				// OOM
-	  key1=key1->insert(tmp2);
-	  break;
-	}
+      if (tmp->cmp_min_to_min(&key2_cpy) > 0)
+      {
+        /*
+          This is the case:
+          key2_cpy:    [------------]
+          key1:                 [-*****]
+                                ^
+                                tmp
+                             
+          Result:
+          key2_cpy:             [---]
+          key1:        [-------][-*****]
+                       ^        ^
+                       insert   tmp
+         */
+        SEL_ARG *new_arg=key2_cpy.clone_first(tmp);
+        if (!new_arg)
+          return 0; // OOM
+        if ((new_arg->next_key_part=key2_cpy.next_key_part))
+          new_arg->increment_use_count(key1->use_count+1);
+        key1=key1->insert(new_arg);
+        key2_cpy.copy_min_to_min(tmp);
+      } 
+      // Now key2_cpy.min == tmp.min
+
+      if ((cmp= tmp->cmp_max_to_max(&key2_cpy)) <= 0)
+      {
+        /*
+          tmp.max <= key2_cpy.max:
+          key2_cpy:   a)  [-------]    or b)     [----]
+          tmp:            [----]                 [----]
+
+          Steps:
+           1) Update next_key_part of tmp: OR it with key2_cpy->next_key_part.
+           2) If case a: Insert range [tmp.max, key2_cpy.max] into key1 using
+                         next_key_part of key2_cpy
+
+           Result:
+           key1:      a)  [----][-]    or b)     [----]
+         */
+        tmp->maybe_flag|= key2_cpy.maybe_flag;
+        key2_cpy.increment_use_count(key1->use_count+1);
+        tmp->next_key_part= key_or(param, tmp->next_key_part,
+                                   key2_cpy.next_key_part);
+
+        if (!cmp)
+          break;                     // case b: done with this key2 range
+
+        // Make key2_cpy the range [tmp.max, key2_cpy.max]
+        key2_cpy.copy_max_to_min(tmp);
+        if (!(tmp=tmp->next))
+        {
+          /*
+            No more ranges in key1. Insert key2_cpy and go to "end"
+            label to insert remaining ranges in key2 if any.
+          */
+          SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
+          if (!tmp2)
+            return 0; // OOM
+          key1=key1->insert(tmp2);
+          key2=key2->next;
+          goto end;
+        }
+        if (tmp->cmp_min_to_max(&key2_cpy) > 0)
+        {
+          /*
+            The next range in key1 does not overlap with key2_cpy.
+            Insert this range into key1 and move on to the next range
+            in key2.
+          */
+          SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
+          if (!tmp2)
+            return 0;                           // OOM
+          key1=key1->insert(tmp2);
+          break;
+        }
+        /*
+          key2_cpy overlaps with the next range in key1 and the case
+          is now "key2.min <= tmp.min <= key2.max". Go back to for(;;)
+          to handle this situation.
+        */
+        continue;
       }
       else
       {
-	SEL_ARG *new_arg=tmp->clone_last(&key); // tmp.min <= x <= key.max
-	if (!new_arg)
-	  return 0;				// OOM
-	tmp->copy_max_to_min(&key);
-	tmp->increment_use_count(key1->use_count+1);
-	/* Increment key count as it may be used for next loop */
-	key.increment_use_count(1);
-	new_arg->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
-	key1=key1->insert(new_arg);
-	break;
+        /*
+          This is the case:
+          key2_cpy:   [-------]
+          tmp:        [------------]
+
+          Result:
+          key1:       [-------][---]
+                      ^        ^
+                      new_arg  tmp
+          Steps:
+           0) If tmp->next_key_part is empty: do nothing. Reason:
+              (key2_cpy->next_key_part OR tmp->next_key_part) will be
+              empty and therefore equal to tmp->next_key_part. Thus,
+              the range in key2_cpy is completely covered by tmp
+           1) Make new_arg with range [tmp.min, key2_cpy.max].
+              new_arg->next_key_part is OR between next_key_part
+              of tmp and key2_cpy
+           2) Make tmp the range [key2.max, tmp.max]
+           3) Insert new_arg into key1
+        */
+        if (!tmp->next_key_part) // Step 0
+        {
+          key2_cpy.increment_use_count(-1);     // Free not used tree
+          break;
+        }
+        SEL_ARG *new_arg=tmp->clone_last(&key2_cpy);
+        if (!new_arg)
+          return 0; // OOM
+        tmp->copy_max_to_min(&key2_cpy);
+        tmp->increment_use_count(key1->use_count+1);
+        /* Increment key count as it may be used for next loop */
+        key2_cpy.increment_use_count(1);
+        new_arg->next_key_part= key_or(param, tmp->next_key_part,
+                                       key2_cpy.next_key_part);
+        key1=key1->insert(new_arg);
+        break;
       }
     }
-    key2=key2->next;
+    // Move on to next range in key2
+    key2=key2->next;                            
   }
 
 end:
+  /*
+    Add key2 ranges that are non-overlapping with and higher than the
+    highest range in key1.
+  */
   while (key2)
   {
     SEL_ARG *next=key2->next;
     if (key2_shared)
     {
-      SEL_ARG *tmp=new SEL_ARG(*key2);		// Must make copy
+      SEL_ARG *tmp=new SEL_ARG(*key2);          // Must make copy
       if (!tmp)
-	return 0;
+        return 0;
       key2->increment_use_count(key1->use_count+1);
       key1=key1->insert(tmp);
     }
     else
-      key1=key1->insert(key2);			// Will destroy key2_root
+      key1=key1->insert(key2);                  // Will destroy key2_root
     key2=next;
   }
   key1->use_count++;
+
+  key1->max_part_no= max_part_no;
   return key1;
 }
 
@@ -7461,11 +9829,7 @@ static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
 void SEL_ARG::test_use_count(SEL_ARG *root)
 {
   uint e_count=0;
-  if (this == root && use_count != 1)
-  {
-    sql_print_information("Use_count: Wrong count %lu for root",use_count);
-    return;
-  }
+
   if (this->type != SEL_ARG::KEY_RANGE)
     return;
   for (SEL_ARG *pos=first(); pos ; pos=pos->next)
@@ -7488,329 +9852,132 @@ void SEL_ARG::test_use_count(SEL_ARG *root)
     sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
                       e_count, elements, (long unsigned int) this);
 }
-
 #endif
 
 /*
-  Calculate estimate of number records that will be retrieved by a range
-  scan on given index using given SEL_ARG intervals tree.
+  Calculate cost and E(#rows) for a given index and intervals tree 
+
   SYNOPSIS
-    check_quick_select
-      param  Parameter from test_quick_select
-      idx               Number of index to use in tree->keys
-      tree              Transformed selection condition, tree->keys[idx]
-                        holds the range tree to be used for scanning.
-      update_tbl_stats  If true, update table->quick_keys with information
+    check_quick_select()
+      param             Parameter from test_quick_select
+      idx               Number of index to use in PARAM::key SEL_TREE::key
+      index_only        TRUE  - assume only index tuples will be accessed
+                        FALSE - assume full table rows will be read
+      tree              Transformed selection condition, tree->key[idx] holds
+                        the intervals for the given index.
+      update_tbl_stats  TRUE <=> update table->quick_* with information
                         about range scan we've evaluated.
+      mrr_flags   INOUT MRR access flags
+      cost        OUT   Scan cost
 
   NOTES
     param->is_ror_scan is set to reflect if the key scan is a ROR (see
     is_key_scan_ror function for more info)
     param->table->quick_*, param->range_count (and maybe others) are
-    updated with data of given key scan, see check_quick_keys for details.
+    updated with data of given key scan, see quick_range_seq_next for details.
 
   RETURN
     Estimate # of records to be retrieved.
     HA_POS_ERROR if estimate calculation failed due to table handler problems.
-
 */
 
-static ha_rows
-check_quick_select(PARAM *param,uint idx,SEL_ARG *tree, bool update_tbl_stats)
-{
-  ha_rows records;
-  bool    cpk_scan;
-  uint key;
+static
+ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
+                           SEL_ARG *tree, bool update_tbl_stats, 
+                           uint *mrr_flags, uint *bufsize, COST_VECT *cost)
+{
+  SEL_ARG_RANGE_SEQ seq;
+  RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
+  handler *file= param->table->file;
+  ha_rows rows= HA_POS_ERROR;
+  uint keynr= param->real_keynr[idx];
   DBUG_ENTER("check_quick_select");
+  
+  /* Handle cases when we don't have a valid non-empty list of range */
+  if (!tree)
+    DBUG_RETURN(HA_POS_ERROR);
+  if (tree->type == SEL_ARG::IMPOSSIBLE)
+    DBUG_RETURN(0L);
+  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
+    DBUG_RETURN(HA_POS_ERROR);
 
-  param->is_ror_scan= FALSE;
-  param->first_null_comp= 0;
+  seq.keyno= idx;
+  seq.real_keyno= keynr;
+  seq.param= param;
+  seq.start= tree;
 
-  if (!tree)
-    DBUG_RETURN(HA_POS_ERROR);			// Can't use it
-  param->max_key_part=0;
   param->range_count=0;
-  key= param->real_keynr[idx];
+  param->max_key_part=0;
 
-  if (tree->type == SEL_ARG::IMPOSSIBLE)
-    DBUG_RETURN(0L);				// Impossible select. return
-  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
-    DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
+  param->is_ror_scan= TRUE;
+  if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
+    param->is_ror_scan= FALSE;
+  
+  *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
+  /*
+    Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
+  */
+  *mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED;
 
-  enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
-  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
-  {
-    /* Records are not ordered by rowid for other types of indexes. */
-    cpk_scan= FALSE;
-  }
-  else
-  {
-    /*
-      Clustered PK scan is a special case, check_quick_keys doesn't recognize
-      CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
-    */
-    cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
-               param->table->file->primary_key_is_clustered());
-    param->is_ror_scan= !cpk_scan;
-  }
-  param->n_ranges= 0;
+  bool pk_is_clustered= file->primary_key_is_clustered();
+  if (index_only && 
+      (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
+      !(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
+     *mrr_flags |= HA_MRR_INDEX_ONLY;
+  
+  if (param->thd->lex->sql_command != SQLCOM_SELECT)
+    *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
 
-  records= check_quick_keys(param, idx, tree,
-                            param->min_key, 0, -1,
-                            param->max_key, 0, -1);
-  if (records != HA_POS_ERROR)
+  *bufsize= param->thd->variables.mrr_buff_size;
+  /*
+    Skip materialized derived table/view result table from MRR check as
+    they aren't contain any data yet.
+  */
+  if (param->table->pos_in_table_list->is_non_derived())
+    rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
+                                            bufsize, mrr_flags, cost);
+  if (rows != HA_POS_ERROR)
   {
+    param->quick_rows[keynr]= rows;
     if (update_tbl_stats)
     {
-      param->table->quick_keys.set_bit(key);
-      param->table->quick_key_parts[key]=param->max_key_part+1;
-      param->table->quick_n_ranges[key]= param->n_ranges;
+      param->table->quick_keys.set_bit(keynr);
+      param->table->quick_key_parts[keynr]= param->max_key_part+1;
+      param->table->quick_n_ranges[keynr]= param->range_count;
       param->table->quick_condition_rows=
-        min(param->table->quick_condition_rows, records);
+        min(param->table->quick_condition_rows, rows);
+      param->table->quick_rows[keynr]= rows;
     }
-    /*
-      Need to save quick_rows in any case as it is used when calculating
-      cost of ROR intersection:
-    */
-    param->table->quick_rows[key]=records;
-    if (cpk_scan)
-      param->is_ror_scan= TRUE;
   }
-  if (param->table->file->index_flags(key, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
-    param->is_ror_scan= FALSE;
-  DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
-  DBUG_RETURN(records);
-}
-
-
-/*
-  Recursively calculate estimate of # rows that will be retrieved by
-  key scan on key idx.
-  SYNOPSIS
-    check_quick_keys()
-      param         Parameter from test_quick select function.
-      idx           Number of key to use in PARAM::keys in list of used keys
-                    (param->real_keynr[idx] holds the key number in table)
-      key_tree      SEL_ARG tree being examined.
-      min_key       Buffer with partial min key value tuple
-      min_key_flag
-      max_key       Buffer with partial max key value tuple
-      max_key_flag
-
-  NOTES
-    The function does the recursive descent on the tree via SEL_ARG::left,
-    SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates
-    are calculated using records_in_range calls at the leaf nodes and then
-    summed.
-
-    param->min_key and param->max_key are used to hold prefixes of key value
-    tuples.
-
-    The side effects are:
-
-    param->max_key_part is updated to hold the maximum number of key parts used
-      in scan minus 1.
-
-    param->range_count is incremented if the function finds a range that
-      wasn't counted by the caller.
-
-    param->is_ror_scan is cleared if the function detects that the key scan is
-      not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
-      function for description of which key scans are ROR scans)
-
-  RETURN
-    #records      E(#records) for given subtree
-    HA_POS_ERROR  if subtree cannot be used for record retrieval
-
-*/
-
-static ha_rows
-check_quick_keys(PARAM *param, uint idx, SEL_ARG *key_tree,
-		 uchar *min_key, uint min_key_flag, int min_keypart,
-                 uchar *max_key, uint max_key_flag, int max_keypart)
-{
-  ha_rows records=0, tmp;
-  uint tmp_min_flag, tmp_max_flag, keynr, min_key_length, max_key_length;
-  uint tmp_min_keypart= min_keypart, tmp_max_keypart= max_keypart;
-  uchar *tmp_min_key, *tmp_max_key;
-  uint8 save_first_null_comp= param->first_null_comp;
-
-  param->max_key_part=max(param->max_key_part,key_tree->part);
-  if (key_tree->left != &null_element)
+  /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
+  enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
+  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
   {
-    /*
-      There are at least two intervals for current key part, i.e. condition
-      was converted to something like
-        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
-      This is not a ROR scan if the key is not Clustered Primary Key.
+    /* 
+      All scans are non-ROR scans for those index types.
+      TODO: Don't have this logic here, make table engines return 
+      appropriate flags instead.
     */
     param->is_ror_scan= FALSE;
-    records=check_quick_keys(param, idx, key_tree->left,
-                             min_key, min_key_flag, min_keypart,
-			     max_key, max_key_flag, max_keypart);
-    if (records == HA_POS_ERROR)			// Impossible
-      return records;
-  }
-
-  tmp_min_key= min_key;
-  tmp_max_key= max_key;
-  tmp_min_keypart+= key_tree->store_min(param->key[idx][key_tree->part].store_length,
-                                        &tmp_min_key, min_key_flag);
-  tmp_max_keypart+= key_tree->store_max(param->key[idx][key_tree->part].store_length,
-                                        &tmp_max_key, max_key_flag);
-  min_key_length= (uint) (tmp_min_key - param->min_key);
-  max_key_length= (uint) (tmp_max_key - param->max_key);
-
-  if (param->is_ror_scan)
-  {
-    /*
-      If the index doesn't cover entire key, mark the scan as non-ROR scan.
-      Actually we're cutting off some ROR scans here.
-    */
-    uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
-                    key_part[key_tree->part].fieldnr - 1;
-    if (param->table->field[fieldnr]->key_length() !=
-        param->key[idx][key_tree->part].length)
-      param->is_ror_scan= FALSE;
-  }
-
-  if (!param->first_null_comp && key_tree->is_null_interval())
-    param->first_null_comp= key_tree->part+1;
-
-  if (key_tree->next_key_part &&
-      key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
-      key_tree->next_key_part->part == key_tree->part+1)
-  {						// const key as prefix
-    if (min_key_length == max_key_length &&
-	!memcmp(min_key, max_key, (uint) (tmp_max_key - max_key)) &&
-	!key_tree->min_flag && !key_tree->max_flag)
-    {
-      tmp=check_quick_keys(param,idx,key_tree->next_key_part, tmp_min_key,
-                           min_key_flag | key_tree->min_flag, tmp_min_keypart,
-                           tmp_max_key, max_key_flag | key_tree->max_flag,
-                           tmp_max_keypart);
-      goto end;					// Ugly, but efficient
-    }
-    else
-    {
-      /* The interval for current key part is not c1 <= keyXpartY <= c1 */
-      param->is_ror_scan= FALSE;
-    }
-
-    tmp_min_flag=key_tree->min_flag;
-    tmp_max_flag=key_tree->max_flag;
-    if (!tmp_min_flag)
-      tmp_min_keypart+=
-      key_tree->next_key_part->store_min_key(param->key[idx],
-                                             &tmp_min_key,
-                                             &tmp_min_flag,
-                                             MAX_KEY);
-    if (!tmp_max_flag)
-      tmp_max_keypart+=
-      key_tree->next_key_part->store_max_key(param->key[idx],
-                                             &tmp_max_key,
-                                             &tmp_max_flag,
-                                             MAX_KEY);
-    min_key_length= (uint) (tmp_min_key - param->min_key);
-    max_key_length= (uint) (tmp_max_key - param->max_key);
-  }
-  else
-  {
-    tmp_min_flag= min_key_flag | key_tree->min_flag;
-    tmp_max_flag= max_key_flag | key_tree->max_flag;
   }
-
-  if (unlikely(param->thd->killed != 0))
-    return HA_POS_ERROR;
-
-  keynr=param->real_keynr[idx];
-  param->range_count++;
-  if (!tmp_min_flag && ! tmp_max_flag &&
-      (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts &&
-      param->table->key_info[keynr].flags & HA_NOSAME &&
-      min_key_length == max_key_length &&
-      !memcmp(param->min_key, param->max_key, min_key_length) &&
-      !param->first_null_comp)
+  else if (param->table->s->primary_key == keynr && pk_is_clustered)
   {
-    tmp=1;					// Max one record
-    param->n_ranges++;
+    /* Clustered PK scan is always a ROR scan (TODO: same as above) */
+    param->is_ror_scan= TRUE;
   }
-  else
+  else if (param->range_count > 1)
   {
-    if (param->is_ror_scan)
-    {
-      /*
-        If we get here, the condition on the key was converted to form
-        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
-          somecond(keyXpart{key_tree->part})"
-        Check if
-          somecond is "keyXpart{key_tree->part} = const" and
-          uncovered "tail" of KeyX parts is either empty or is identical to
-          first members of clustered primary key.
-      */
-      if (!(min_key_length == max_key_length &&
-            !memcmp(min_key, max_key, (uint) (tmp_max_key - max_key)) &&
-            !key_tree->min_flag && !key_tree->max_flag &&
-            is_key_scan_ror(param, keynr, key_tree->part + 1)))
-        param->is_ror_scan= FALSE;
-    }
-    param->n_ranges++;
-
-    if (tmp_min_flag & GEOM_FLAG)
-    {
-      key_range min_range;
-      min_range.key=    param->min_key;
-      min_range.length= min_key_length;
-      min_range.keypart_map= make_keypart_map(tmp_min_keypart);
-      /* In this case tmp_min_flag contains the handler-read-function */
-      min_range.flag=   (ha_rkey_function) (tmp_min_flag ^ GEOM_FLAG);
-
-      tmp= param->table->file->records_in_range(keynr,
-                                                &min_range, (key_range*) 0);
-    }
-    else
-    {
-      key_range min_range, max_range;
-
-      min_range.key=    param->min_key;
-      min_range.length= min_key_length;
-      min_range.flag=   (tmp_min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
-                         HA_READ_KEY_EXACT);
-      min_range.keypart_map= make_keypart_map(tmp_min_keypart);
-      max_range.key=    param->max_key;
-      max_range.length= max_key_length;
-      max_range.flag=   (tmp_max_flag & NEAR_MAX ?
-                         HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
-      max_range.keypart_map= make_keypart_map(tmp_max_keypart);
-      tmp=param->table->file->records_in_range(keynr,
-                                               (min_key_length ? &min_range :
-                                                (key_range*) 0),
-                                               (max_key_length ? &max_range :
-                                                (key_range*) 0));
-    }
-  }
- end:
-  if (tmp == HA_POS_ERROR)			// Impossible range
-    return tmp;
-  records+=tmp;
-  if (key_tree->right != &null_element)
-  {
-    /*
-      There are at least two intervals for current key part, i.e. condition
-      was converted to something like
-        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
-      This is not a ROR scan if the key is not Clustered Primary Key.
+    /* 
+      Scaning multiple key values in the index: the records are ROR
+      for each value, but not between values. E.g, "SELECT ... x IN
+      (1,3)" returns ROR order for all records with x=1, then ROR
+      order for records with x=3
     */
     param->is_ror_scan= FALSE;
-    tmp=check_quick_keys(param, idx, key_tree->right,
-                         min_key, min_key_flag, min_keypart,
-                         max_key, max_key_flag, max_keypart);
-    if (tmp == HA_POS_ERROR)
-      return tmp;
-    records+=tmp;
   }
-  param->first_null_comp= save_first_null_comp;
-  return records;
+
+  DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
+  DBUG_RETURN(rows); //psergey-merge:todo: maintain first_null_comp.
 }
 
 
@@ -7838,13 +10005,14 @@ check_quick_keys(PARAM *param, uint idx, SEL_ARG *key_tree,
     where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
 
     and the table has a clustered Primary Key defined as 
-
       PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) 
     
     i.e. the first key parts of it are identical to uncovered parts ot the 
     key being scanned. This function assumes that the index flags do not
     include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
 
+    Check (1) is made in quick_range_seq_next()
+
   RETURN
     TRUE   The scan is ROR-scan
     FALSE  Otherwise
@@ -7857,9 +10025,19 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
   KEY_PART_INFO *key_part_end= (table_key->key_part +
                                 table_key->key_parts);
   uint pk_number;
+  
+  for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
+  {
+    uint16 fieldnr= param->table->key_info[keynr].
+                    key_part[kp - table_key->key_part].fieldnr - 1;
+    if (param->table->field[fieldnr]->key_length() != kp->length)
+      return FALSE;
+  }
 
   if (key_part == key_part_end)
     return TRUE;
+
+  key_part= table_key->key_part + nparts;
   pk_number= param->table->s->primary_key;
   if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
     return FALSE;
@@ -7884,12 +10062,14 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
   SYNOPSIS
     get_quick_select()
       param
-      idx          Index of used key in param->key.
-      key_tree     SEL_ARG tree for the used key
-      parent_alloc If not NULL, use it to allocate memory for
-                   quick select data. Otherwise use quick->alloc.
+      idx            Index of used key in param->key.
+      key_tree       SEL_ARG tree for the used key
+      mrr_flags      MRR parameter for quick select
+      mrr_buf_size   MRR parameter for quick select
+      parent_alloc   If not NULL, use it to allocate memory for
+                     quick select data. Otherwise use quick->alloc.
   NOTES
-    The caller must call QUICK_SELECT::init for returned quick select
+    The caller must call QUICK_SELECT::init for returned quick select.
 
     CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
     deallocated when the returned quick select is deleted.
@@ -7900,25 +10080,26 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
 */
 
 QUICK_RANGE_SELECT *
-get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
-                 MEM_ROOT *parent_alloc)
+get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
+                 uint mrr_buf_size, MEM_ROOT *parent_alloc)
 {
   QUICK_RANGE_SELECT *quick;
+  bool create_err= FALSE;
   DBUG_ENTER("get_quick_select");
 
   if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
     quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
                                       param->real_keynr[idx],
                                       test(parent_alloc),
-                                      parent_alloc);
+                                      parent_alloc, &create_err);
   else
     quick=new QUICK_RANGE_SELECT(param->thd, param->table,
                                  param->real_keynr[idx],
-                                 test(parent_alloc));
+                                 test(parent_alloc), NULL, &create_err);
 
   if (quick)
   {
-    if (quick->error ||
+    if (create_err ||
 	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
 		       param->max_key,0))
     {
@@ -7927,6 +10108,8 @@ get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
     }
     else
     {
+      quick->mrr_flags= mrr_flags;
+      quick->mrr_buf_size= mrr_buf_size;
       quick->key_parts=(KEY_PART*)
         memdup_root(parent_alloc? parent_alloc : &quick->alloc,
                     (char*) param->key[idx],
@@ -8077,7 +10260,20 @@ bool QUICK_RANGE_SELECT::unique_key_range()
 }
 
 
-/* Returns TRUE if any part of the key is NULL */
+
+/*
+  Return TRUE if any part of the key is NULL
+
+  SYNOPSIS
+    null_part_in_key()    
+      key_part  Array of key parts (index description)
+      key       Key values tuple
+      length    Length of key values tuple in bytes.
+
+  RETURN
+    TRUE   The tuple has at least one "keypartX is NULL"
+    FALSE  Otherwise
+*/
 
 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
 {
@@ -8097,7 +10293,7 @@ bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
   return is_key_used(head, index, fields);
 }
 
-bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields)
+bool QUICK_INDEX_SORT_SELECT::is_keys_used(const MY_BITMAP *fields)
 {
   QUICK_RANGE_SELECT *quick;
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
@@ -8111,11 +10307,11 @@ bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields)
 
 bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields)
 {
-  QUICK_RANGE_SELECT *quick;
-  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
-  while ((quick= it++))
+  QUICK_SELECT_WITH_RECORD *qr;
+  List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
+  while ((qr= it++))
   {
-    if (is_key_used(head, quick->index, fields))
+    if (is_key_used(head, qr->quick->index, fields))
       return 1;
   }
   return 0;
@@ -8134,6 +10330,19 @@ bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
 }
 
 
+FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
+{
+  bool create_err= FALSE;
+  FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
+  if (create_err)
+  {
+    delete fts;
+    return NULL;
+  }
+  else
+    return fts;
+}
+
 /*
   Create quick select from ref/ref_or_null scan.
 
@@ -8162,10 +10371,12 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
   KEY_PART *key_part;
   QUICK_RANGE *range;
   uint part;
+  bool create_err= FALSE;
+  COST_VECT cost;
 
   old_root= thd->mem_root;
   /* The following call may change thd->mem_root */
-  quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0);
+  quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
   /* save mem_root set by QUICK_RANGE_SELECT constructor */
   alloc= thd->mem_root;
   /*
@@ -8174,7 +10385,7 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
   */
   thd->mem_root= old_root;
 
-  if (!quick)
+  if (!quick || create_err)
     return 0;			/* no ranges found */
   if (quick->init())
     goto err;
@@ -8228,8 +10439,20 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
       goto err;
   }
 
-  return quick;
+  /* Call multi_range_read_info() to get the MRR flags and buffer size */
+  quick->mrr_flags= HA_MRR_NO_ASSOCIATION | 
+                    (table->key_read ? HA_MRR_INDEX_ONLY : 0);
+  if (thd->lex->sql_command != SQLCOM_SELECT)
+    quick->mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
 
+  quick->mrr_buf_size= thd->variables.mrr_buff_size;
+  if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
+                                         ~0, 
+                                         &quick->mrr_buf_size,
+                                         &quick->mrr_flags, &cost))
+    goto err;
+
+  return quick;
 err:
   delete quick;
   return 0;
@@ -8253,20 +10476,34 @@ err:
     other error
 */
 
-int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
+int read_keys_and_merge_scans(THD *thd,
+                              TABLE *head,
+                              List<QUICK_RANGE_SELECT> quick_selects,
+                              QUICK_RANGE_SELECT *pk_quick_select,
+                              READ_RECORD *read_record,
+                              bool intersection,
+                              key_map *filtered_scans,
+                              Unique **unique_ptr)
 {
   List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
   QUICK_RANGE_SELECT* cur_quick;
   int result;
+  Unique *unique= *unique_ptr;
   handler *file= head->file;
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
+  bool with_cpk_filter= pk_quick_select != NULL;
+
+  DBUG_ENTER("read_keys_and_merge");
 
   /* We're going to just read rowids. */
-  head->set_keyread(TRUE);
+  if (!head->key_read)
+  {
+    head->enable_keyread();
+  }
   head->prepare_for_position();
 
   cur_quick_it.rewind();
   cur_quick= cur_quick_it++;
+  bool first_quick= TRUE;
   DBUG_ASSERT(cur_quick != 0);
   
   /*
@@ -8274,7 +10511,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
     reset here.
   */
   if (cur_quick->init() || cur_quick->reset())
-    DBUG_RETURN(1);
+    goto err;
 
   if (unique == NULL)
   {
@@ -8284,7 +10521,11 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
 
     unique= new Unique(refpos_order_cmp, (void *)file,
                        file->ref_length,
-                       thd->variables.sortbuff_size);
+                       thd->variables.sortbuff_size,
+		       intersection ? quick_selects.elements : 0);                     
+    if (!unique)
+      goto err;
+    *unique_ptr= unique;
   }
   else
     unique->reset();
@@ -8292,12 +10533,18 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
   DBUG_ASSERT(file->ref_length == unique->get_size());
   DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
 
-  if (!unique)
-    DBUG_RETURN(1);
   for (;;)
   {
     while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
     {
+      if (intersection)
+        with_cpk_filter= filtered_scans->is_set(cur_quick->index);
+      if (first_quick)
+      {
+        first_quick= FALSE;
+        if (intersection && unique->is_in_memory())
+          unique->close_for_expansion();
+      }
       cur_quick->range_end();
       cur_quick= cur_quick_it++;
       if (!cur_quick)
@@ -8306,7 +10553,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
       if (cur_quick->file->inited != handler::NONE) 
         cur_quick->file->ha_index_end();
       if (cur_quick->init() || cur_quick->reset())
-        DBUG_RETURN(1);
+        goto err;
     }
 
     if (result)
@@ -8314,22 +10561,21 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
       if (result != HA_ERR_END_OF_FILE)
       {
         cur_quick->range_end();
-        DBUG_RETURN(result);
+        goto err;
       }
       break;
     }
 
     if (thd->killed)
-      DBUG_RETURN(1);
+      goto err;
 
-    /* skip row if it will be retrieved by clustered PK scan */
-    if (pk_quick_select && pk_quick_select->row_in_ranges())
+    if (with_cpk_filter &&
+        pk_quick_select->row_in_ranges() != intersection )
       continue;
 
     cur_quick->file->position(cur_quick->record);
-    result= unique->unique_add((char*)cur_quick->file->ref);
-    if (result)
-      DBUG_RETURN(1);
+    if (unique->unique_add((char*)cur_quick->file->ref))
+      goto err;
   }
 
   /*
@@ -8338,14 +10584,31 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
     sequence.
   */
   result= unique->get(head);
+  /*
+    index merge currently doesn't support "using index" at all
+  */
+  head->disable_keyread();
+  if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE))
+    result= 1;
+ DBUG_RETURN(result);
+
+err:
+  head->disable_keyread();
+  DBUG_RETURN(1);
+}
+
+
+int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
+
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
+  result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select,
+                                    &read_record, FALSE, NULL, &unique);
   doing_pk_scan= FALSE;
-  /* index_merge currently doesn't support "using index" at all */
-  head->set_keyread(FALSE);
-  init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE);
   DBUG_RETURN(result);
 }
 
-
 /*
   Get next row for index_merge.
   NOTES
@@ -8382,6 +10645,32 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
   DBUG_RETURN(result);
 }
 
+int QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge()
+
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge");
+  result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select,
+                                    &read_record, TRUE, &filtered_scans,
+                                    &unique);
+  DBUG_RETURN(result);
+}
+
+int QUICK_INDEX_INTERSECT_SELECT::get_next()
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::get_next");
+
+  if ((result= read_record.read_record(&read_record)) == -1)
+  {
+    result= HA_ERR_END_OF_FILE;
+    end_read_record(&read_record);
+    free_io_cache(head);
+  }
+
+  DBUG_RETURN(result);
+}
+
 
 /*
   Retrieve next record.
@@ -8405,7 +10694,8 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
 
 int QUICK_ROR_INTERSECT_SELECT::get_next()
 {
-  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
+  List_iterator_fast<QUICK_SELECT_WITH_RECORD> quick_it(quick_selects);
+  QUICK_SELECT_WITH_RECORD *qr;
   QUICK_RANGE_SELECT* quick;
   int error, cmp;
   uint last_rowid_count=0;
@@ -8414,7 +10704,8 @@ int QUICK_ROR_INTERSECT_SELECT::get_next()
   do
   {
     /* Get a rowid for first quick and save it as a 'candidate' */
-    quick= quick_it++;
+    qr= quick_it++;
+    quick= qr->quick;
     error= quick->get_next();
     if (cpk_quick)
     {
@@ -8424,17 +10715,22 @@ int QUICK_ROR_INTERSECT_SELECT::get_next()
     if (error)
       DBUG_RETURN(error);
 
+    /* Save the read key tuple */
+    key_copy(qr->key_tuple, record, head->key_info + quick->index,
+             quick->max_used_key_length);
+
     quick->file->position(quick->record);
     memcpy(last_rowid, quick->file->ref, head->file->ref_length);
     last_rowid_count= 1;
 
     while (last_rowid_count < quick_selects.elements)
     {
-      if (!(quick= quick_it++))
+      if (!(qr= quick_it++))
       {
         quick_it.rewind();
-        quick= quick_it++;
+        qr= quick_it++;
       }
+      quick= qr->quick;
 
       do
       {
@@ -8444,6 +10740,9 @@ int QUICK_ROR_INTERSECT_SELECT::get_next()
         cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
       } while (cmp < 0);
 
+      key_copy(qr->key_tuple, record, head->key_info + quick->index,
+               quick->max_used_key_length);
+
       /* Ok, current select 'caught up' and returned ref >= cur_ref */
       if (cmp > 0)
       {
@@ -8459,6 +10758,10 @@ int QUICK_ROR_INTERSECT_SELECT::get_next()
         }
         memcpy(last_rowid, quick->file->ref, head->file->ref_length);
         last_rowid_count= 1;
+
+        //save the fields here
+        key_copy(qr->key_tuple, record, head->key_info + quick->index,
+                 quick->max_used_key_length);
       }
       else
       {
@@ -8469,8 +10772,23 @@ int QUICK_ROR_INTERSECT_SELECT::get_next()
 
     /* We get here if we got the same row ref in all scans. */
     if (need_to_fetch_row)
-      error= head->file->rnd_pos(head->record[0], last_rowid);
+      error= head->file->ha_rnd_pos(head->record[0], last_rowid);
   } while (error == HA_ERR_RECORD_DELETED);
+
+  if (!need_to_fetch_row)
+  {
+    /* Restore the columns we've read/saved with other quick selects */
+    quick_it.rewind();
+    while ((qr= quick_it++))
+    {
+      if (qr->quick != quick)
+      {
+        key_restore(record, qr->key_tuple, head->key_info + qr->quick->index,
+                    qr->quick->max_used_key_length);
+      }
+    }
+  }
+
   DBUG_RETURN(error);
 }
 
@@ -8513,12 +10831,12 @@ int QUICK_ROR_UNION_SELECT::get_next()
       {
         if (error != HA_ERR_END_OF_FILE)
           DBUG_RETURN(error);
-        queue_remove(&queue, 0);
+        queue_remove_top(&queue);
       }
       else
       {
         quick->save_last_pos();
-        queue_replaced(&queue);
+        queue_replace_top(&queue);
       }
 
       if (!have_prev_rowid)
@@ -8535,7 +10853,7 @@ int QUICK_ROR_UNION_SELECT::get_next()
     cur_rowid= prev_rowid;
     prev_rowid= tmp;
 
-    error= head->file->rnd_pos(quick->record, prev_rowid);
+    error= head->file->ha_rnd_pos(quick->record, prev_rowid);
   } while (error == HA_ERR_RECORD_DELETED);
   DBUG_RETURN(error);
 }
@@ -8543,12 +10861,12 @@ int QUICK_ROR_UNION_SELECT::get_next()
 
 int QUICK_RANGE_SELECT::reset()
 {
-  uint  mrange_bufsiz;
+  uint  buf_size;
   uchar *mrange_buff;
+  int   error;
+  HANDLER_BUFFER empty_buf;
   DBUG_ENTER("QUICK_RANGE_SELECT::reset");
-  next=0;
   last_range= NULL;
-  in_range= FALSE;
   cur_range= (QUICK_RANGE**) ranges.buffer;
 
   if (file->inited == handler::NONE)
@@ -8559,69 +10877,43 @@ int QUICK_RANGE_SELECT::reset()
         DBUG_RETURN(error);
   }
 
-  /* Do not allocate the buffers twice. */
-  if (multi_range_length)
+  /* Allocate buffer if we need one but haven't allocated it yet */
+  if (mrr_buf_size && !mrr_buf_desc)
   {
-    DBUG_ASSERT(multi_range_length == min(multi_range_count, ranges.elements));
-    DBUG_RETURN(0);
-  }
-
-  /* Allocate the ranges array. */
-  DBUG_ASSERT(ranges.elements);
-  multi_range_length= min(multi_range_count, ranges.elements);
-  DBUG_ASSERT(multi_range_length > 0);
-  while (multi_range_length && ! (multi_range= (KEY_MULTI_RANGE*)
-                                  my_malloc(multi_range_length *
-                                            sizeof(KEY_MULTI_RANGE),
-                                            MYF(MY_WME))))
-  {
-    /* Try to shrink the buffers until it is 0. */
-    multi_range_length/= 2;
-  }
-  if (! multi_range)
-  {
-    multi_range_length= 0;
-    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
-  }
-
-  /* Allocate the handler buffer if necessary.  */
-  if (file->ha_table_flags() & HA_NEED_READ_RANGE_BUFFER)
-  {
-    mrange_bufsiz= min(multi_range_bufsiz,
-                       ((uint)QUICK_SELECT_I::records + 1)* head->s->reclength);
-
-    while (mrange_bufsiz &&
-           ! my_multi_malloc(MYF(MY_WME),
-                             &multi_range_buff,
-                             (uint) sizeof(*multi_range_buff),
-                             &mrange_buff, (uint) mrange_bufsiz,
-                             NullS))
+    buf_size= mrr_buf_size;
+    while (buf_size && !my_multi_malloc(MYF(MY_WME),
+                                        &mrr_buf_desc, sizeof(*mrr_buf_desc),
+                                        &mrange_buff, buf_size,
+                                        NullS))
     {
       /* Try to shrink the buffers until both are 0. */
-      mrange_bufsiz/= 2;
+      buf_size/= 2;
     }
-    if (! multi_range_buff)
-    {
-      my_free(multi_range);
-      multi_range= NULL;
-      multi_range_length= 0;
+    if (!mrr_buf_desc)
       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
-    }
 
     /* Initialize the handler buffer. */
-    multi_range_buff->buffer= mrange_buff;
-    multi_range_buff->buffer_end= mrange_buff + mrange_bufsiz;
-    multi_range_buff->end_of_used_area= mrange_buff;
-#ifdef HAVE_purify
+    mrr_buf_desc->buffer= mrange_buff;
+    mrr_buf_desc->buffer_end= mrange_buff + buf_size;
+    mrr_buf_desc->end_of_used_area= mrange_buff;
+#ifdef HAVE_valgrind
     /*
       We need this until ndb will use the buffer efficiently
       (Now ndb stores  complete row in here, instead of only the used fields
       which gives us valgrind warnings in compare_record[])
     */
-    bzero((char*) mrange_buff, mrange_bufsiz);
+    bzero((char*) mrange_buff, buf_size);
 #endif
   }
-  DBUG_RETURN(0);
+
+  if (!mrr_buf_desc)
+    empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
+ 
+  RANGE_SEQ_IF seq_funcs= {NULL, quick_range_seq_init, quick_range_seq_next, 0, 0};
+  error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
+                                     mrr_flags, mrr_buf_desc? mrr_buf_desc: 
+                                                              &empty_buf);
+  DBUG_RETURN(error);
 }
 
 
@@ -8642,13 +10934,8 @@ int QUICK_RANGE_SELECT::reset()
 
 int QUICK_RANGE_SELECT::get_next()
 {
-  int             result;
-  KEY_MULTI_RANGE *mrange;
+  range_id_t dummy;
   DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
-  DBUG_ASSERT(multi_range_length && multi_range &&
-              (cur_range >= (QUICK_RANGE**) ranges.buffer) &&
-              (cur_range <= (QUICK_RANGE**) ranges.buffer + ranges.elements));
-
   if (in_ror_merged_scan)
   {
     /*
@@ -8658,46 +10945,8 @@ int QUICK_RANGE_SELECT::get_next()
     head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
   }
 
-  for (;;)
-  {
-    if (in_range)
-    {
-      /* We did already start to read this key. */
-      result= file->read_multi_range_next(&mrange);
-      if (result != HA_ERR_END_OF_FILE)
-        goto end;
-    }
-
-    uint count= min(multi_range_length, ranges.elements -
-                    (cur_range - (QUICK_RANGE**) ranges.buffer));
-    if (count == 0)
-    {
-      /* Ranges have already been used up before. None is left for read. */
-      in_range= FALSE;
-      if (in_ror_merged_scan)
-        head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
-      DBUG_RETURN(HA_ERR_END_OF_FILE);
-    }
-    KEY_MULTI_RANGE *mrange_slot, *mrange_end;
-    for (mrange_slot= multi_range, mrange_end= mrange_slot+count;
-         mrange_slot < mrange_end;
-         mrange_slot++)
-    {
-      last_range= *(cur_range++);
-      last_range->make_min_endpoint(&mrange_slot->start_key);
-      last_range->make_max_endpoint(&mrange_slot->end_key);
-      mrange_slot->range_flag= last_range->flag;
-    }
-
-    result= file->read_multi_range_first(&mrange, multi_range, count,
-                                         sorted, multi_range_buff);
-    if (result != HA_ERR_END_OF_FILE)
-      goto end;
-    in_range= FALSE; /* No matching rows; go to next set of ranges. */
-  }
+  int result= file->multi_range_read_next(&dummy);
 
-end:
-  in_range= ! result;
   if (in_ror_merged_scan)
   {
     /* Restore bitmaps set on entry */
@@ -8706,6 +10955,7 @@ end:
   DBUG_RETURN(result);
 }
 
+
 /*
   Get the next record with a different prefix.
 
@@ -8746,8 +10996,8 @@ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
     {
       /* Read the next record in the same range with prefix after cur_prefix. */
       DBUG_ASSERT(cur_prefix != NULL);
-      result= file->index_read_map(record, cur_prefix, keypart_map,
-                                   HA_READ_AFTER_KEY);
+      result= file->ha_index_read_map(record, cur_prefix, keypart_map,
+                                      HA_READ_AFTER_KEY);
       if (result || last_range->max_keypart_map == 0)
         DBUG_RETURN(result);
 
@@ -8796,8 +11046,8 @@ int QUICK_RANGE_SELECT_GEOM::get_next()
     if (last_range)
     {
       // Already read through key
-      result= file->index_next_same(record, last_range->min_key,
-				    last_range->min_length);
+      result= file->ha_index_next_same(record, last_range->min_key,
+                                       last_range->min_length);
       if (result != HA_ERR_END_OF_FILE)
 	DBUG_RETURN(result);
     }
@@ -8811,10 +11061,10 @@ int QUICK_RANGE_SELECT_GEOM::get_next()
     }
     last_range= *(cur_range++);
 
-    result= file->index_read_map(record, last_range->min_key,
-                                 last_range->min_keypart_map,
-                                 (ha_rkey_function)(last_range->flag ^
-                                                    GEOM_FLAG));
+    result= file->ha_index_read_map(record, last_range->min_key,
+                                    last_range->min_keypart_map,
+                                    (ha_rkey_function)(last_range->flag ^
+                                                       GEOM_FLAG));
     if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
       DBUG_RETURN(result);
     last_range= 0;				// Not found, to next range
@@ -8882,9 +11132,9 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
     Use default MRR implementation for reverse scans. No table engine
     currently can do an MRR scan with output in reverse index order.
   */
-  multi_range_length= 0;
-  multi_range= NULL;
-  multi_range_buff= NULL;
+  mrr_buf_desc= NULL;
+  mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
+  mrr_buf_size= 0;
 
   QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
   QUICK_RANGE **end_range= pr + ranges.elements;
@@ -8925,9 +11175,9 @@ int QUICK_SELECT_DESC::get_next()
     {						// Already read through key
       result = ((last_range->flag & EQ_RANGE && 
                  used_key_parts <= head->key_info[index].key_parts) ? 
-                file->index_next_same(record, last_range->min_key,
+                file->ha_index_next_same(record, last_range->min_key,
                                       last_range->min_length) :
-                file->index_prev(record));
+                file->ha_index_prev(record));
       if (!result)
       {
 	if (cmp_prev(*rev_it.ref()) == 0)
@@ -8943,7 +11193,7 @@ int QUICK_SELECT_DESC::get_next()
     if (last_range->flag & NO_MAX_RANGE)        // Read last record
     {
       int local_error;
-      if ((local_error=file->index_last(record)))
+      if ((local_error= file->ha_index_last(record)))
 	DBUG_RETURN(local_error);		// Empty table
       if (cmp_prev(last_range) == 0)
 	DBUG_RETURN(0);
@@ -8955,9 +11205,9 @@ int QUICK_SELECT_DESC::get_next()
         used_key_parts <= head->key_info[index].key_parts)
 
     {
-      result = file->index_read_map(record, last_range->max_key,
-                                    last_range->max_keypart_map,
-                                    HA_READ_KEY_EXACT);
+      result= file->ha_index_read_map(record, last_range->max_key,
+                                      last_range->max_keypart_map,
+                                      HA_READ_KEY_EXACT);
     }
     else
     {
@@ -8965,11 +11215,11 @@ int QUICK_SELECT_DESC::get_next()
                   (last_range->flag & EQ_RANGE && 
                    used_key_parts > head->key_info[index].key_parts) ||
                   range_reads_after_key(last_range));
-      result=file->index_read_map(record, last_range->max_key,
-                                  last_range->max_keypart_map,
-                                  ((last_range->flag & NEAR_MAX) ?
-                                   HA_READ_BEFORE_KEY :
-                                   HA_READ_PREFIX_LAST_OR_PREV));
+      result= file->ha_index_read_map(record, last_range->max_key,
+                                      last_range->max_keypart_map,
+                                      ((last_range->flag & NEAR_MAX) ?
+                                       HA_READ_BEFORE_KEY :
+                                       HA_READ_PREFIX_LAST_OR_PREV));
     }
     if (result)
     {
@@ -9001,7 +11251,7 @@ int QUICK_SELECT_DESC::get_next()
 QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
 {
   QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg);
-  if (new_quick == NULL || new_quick->error != 0)
+  if (new_quick == NULL)
   {
     delete new_quick;
     return NULL;
@@ -9013,6 +11263,7 @@ QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
 /*
   Compare if found key is over max-value
   Returns 0 if key <= range->max_key
+  TODO: Figure out why can't this function be as simple as cmp_prev(). 
 */
 
 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
@@ -9082,30 +11333,53 @@ bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
 }
 
 
-void QUICK_RANGE_SELECT::add_info_string(String *str)
+void QUICK_SELECT_I::add_key_name(String *str, bool *first)
 {
   KEY *key_info= head->key_info + index;
+
+  if (*first)
+    *first= FALSE;
+  else
+    str->append(',');
   str->append(key_info->name);
 }
+ 
+
+void QUICK_RANGE_SELECT::add_info_string(String *str)
+{
+  bool first= TRUE;
+  
+  add_key_name(str, &first);
+}
 
 void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
 {
   QUICK_RANGE_SELECT *quick;
   bool first= TRUE;
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+
   str->append(STRING_WITH_LEN("sort_union("));
   while ((quick= it++))
   {
-    if (!first)
-      str->append(',');
-    else
-      first= FALSE;
-    quick->add_info_string(str);
+    quick->add_key_name(str, &first);
   }
   if (pk_quick_select)
+    pk_quick_select->add_key_name(str, &first);
+  str->append(')');
+}
+
+void QUICK_INDEX_INTERSECT_SELECT::add_info_string(String *str)
+{
+  QUICK_RANGE_SELECT *quick;
+  bool first= TRUE;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+
+  str->append(STRING_WITH_LEN("sort_intersect("));
+  if (pk_quick_select)
+    pk_quick_select->add_key_name(str, &first);
+  while ((quick= it++))
   {
-    str->append(',');
-    pk_quick_select->add_info_string(str);
+    quick->add_key_name(str, &first);
   }
   str->append(')');
 }
@@ -9113,131 +11387,127 @@ void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
 void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
 {
   bool first= TRUE;
-  QUICK_RANGE_SELECT *quick;
-  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  QUICK_SELECT_WITH_RECORD *qr;
+  List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
+
   str->append(STRING_WITH_LEN("intersect("));
-  while ((quick= it++))
+  while ((qr= it++))
   {
-    KEY *key_info= head->key_info + quick->index;
-    if (!first)
-      str->append(',');
-    else
-      first= FALSE;
-    str->append(key_info->name);
+    qr->quick->add_key_name(str, &first);
   }
   if (cpk_quick)
-  {
-    KEY *key_info= head->key_info + cpk_quick->index;
-    str->append(',');
-    str->append(key_info->name);
-  }
+    cpk_quick->add_key_name(str, &first);
   str->append(')');
 }
 
+
 void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
 {
-  bool first= TRUE;
   QUICK_SELECT_I *quick;
+  bool first= TRUE;
   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+
   str->append(STRING_WITH_LEN("union("));
   while ((quick= it++))
   {
-    if (!first)
-      str->append(',');
-    else
+    if (first)
       first= FALSE;
+    else
+      str->append(',');
     quick->add_info_string(str);
   }
   str->append(')');
 }
 
 
-void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
-                                              String *used_lengths)
+void QUICK_SELECT_I::add_key_and_length(String *key_names,
+                                        String *used_lengths,
+                                        bool *first)
 {
   char buf[64];
   uint length;
   KEY *key_info= head->key_info + index;
+
+  if (*first)
+    *first= FALSE;
+  else
+  {
+    key_names->append(',');
+    used_lengths->append(',');
+  }
   key_names->append(key_info->name);
-  length= longlong2str(max_used_key_length, buf, 10) - buf;
+  length= longlong10_to_str(max_used_key_length, buf, 10) - buf;
   used_lengths->append(buf, length);
 }
 
+
+void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
+                                              String *used_lengths)
+{
+  bool first= TRUE;
+
+  add_key_and_length(key_names, used_lengths, &first);
+}
+
 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
                                                     String *used_lengths)
 {
-  char buf[64];
-  uint length;
-  bool first= TRUE;
   QUICK_RANGE_SELECT *quick;
+  bool first= TRUE;
 
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+
   while ((quick= it++))
   {
-    if (first)
-      first= FALSE;
-    else
-    {
-      key_names->append(',');
-      used_lengths->append(',');
-    }
-
-    KEY *key_info= head->key_info + quick->index;
-    key_names->append(key_info->name);
-    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
-    used_lengths->append(buf, length);
+    quick->add_key_and_length(key_names, used_lengths, &first);
   }
+
   if (pk_quick_select)
-  {
-    KEY *key_info= head->key_info + pk_quick_select->index;
-    key_names->append(',');
-    key_names->append(key_info->name);
-    length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
-    used_lengths->append(',');
-    used_lengths->append(buf, length);
-  }
+    pk_quick_select->add_key_and_length(key_names, used_lengths, &first);
 }
 
-void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
-                                                      String *used_lengths)
+
+void QUICK_INDEX_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
+                                                        String *used_lengths)
 {
-  char buf[64];
-  uint length;
-  bool first= TRUE;
   QUICK_RANGE_SELECT *quick;
+  bool first= TRUE;
+
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+
+  if (pk_quick_select)
+    pk_quick_select->add_key_and_length(key_names, used_lengths, &first);
+
   while ((quick= it++))
   {
-    KEY *key_info= head->key_info + quick->index;
-    if (first)
-      first= FALSE;
-    else
-    {
-      key_names->append(',');
-      used_lengths->append(',');
-    }
-    key_names->append(key_info->name);
-    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
-    used_lengths->append(buf, length);
+    quick->add_key_and_length(key_names, used_lengths, &first);
   }
+}
 
-  if (cpk_quick)
+void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
+                                                      String *used_lengths)
+{
+  QUICK_SELECT_WITH_RECORD *qr;
+  bool first= TRUE;
+
+  List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
+
+  while ((qr= it++))
   {
-    KEY *key_info= head->key_info + cpk_quick->index;
-    key_names->append(',');
-    key_names->append(key_info->name);
-    length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
-    used_lengths->append(',');
-    used_lengths->append(buf, length);
+    qr->quick->add_key_and_length(key_names, used_lengths, &first);
   }
+  if (cpk_quick)
+    cpk_quick->add_key_and_length(key_names, used_lengths, &first);
 }
 
 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
                                                   String *used_lengths)
 {
-  bool first= TRUE;
   QUICK_SELECT_I *quick;
+  bool first= TRUE;
+
   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+
   while ((quick= it++))
   {
     if (first)
@@ -9266,7 +11536,7 @@ static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
                        uchar *key_infix, uint *key_infix_len,
                        KEY_PART_INFO **first_non_infix_part);
 static bool
-check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
+check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
                                Field::imagetype image_type);
 
 static void
@@ -9429,7 +11699,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
   /* Perform few 'cheap' tests whether this access method is applicable. */
   if (!join)
     DBUG_RETURN(NULL);        /* This is not a select statement. */
-  if ((join->tables != 1) ||  /* The query must reference one table. */
+  if ((join->table_count != 1) ||  /* The query must reference one table. */
       (join->select_lex->olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */
     DBUG_RETURN(NULL);
   if (table->s->keys == 0)        /* There are no indexes to use. */
@@ -9763,8 +12033,14 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
       cur_index_tree= get_index_range_tree(cur_index, tree, param,
                                            &cur_param_idx);
       /* Check if this range tree can be used for prefix retrieval. */
+      COST_VECT dummy_cost;
+      uint mrr_flags= HA_MRR_USE_DEFAULT_IMPL;
+      uint mrr_bufsize=0;
       cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
-                                                    cur_index_tree, TRUE);
+                                                   FALSE /*don't care*/,
+                                                   cur_index_tree, TRUE,
+                                                   &mrr_flags, &mrr_bufsize,
+                                                   &dummy_cost);
     }
     cost_group_min_max(table, cur_index_info, cur_used_key_parts,
                        cur_group_key_parts, tree, cur_index_tree,
@@ -9859,14 +12135,14 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
 */
 
 static bool
-check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
+check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
                                Field::imagetype image_type)
 {
   DBUG_ENTER("check_group_min_max_predicates");
   DBUG_ASSERT(cond && min_max_arg_item);
 
   cond= cond->real_item();
-  Item::Type cond_type= cond->type();
+  Item::Type cond_type= cond->real_type();
   if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
   {
     DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
@@ -9882,16 +12158,27 @@ check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
   }
 
   /*
-    TODO:
-    This is a very crude fix to handle sub-selects in the WHERE clause
-    (Item_subselect objects). With the test below we rule out from the
-    optimization all queries with subselects in the WHERE clause. What has to
-    be done, is that here we should analyze whether the subselect references
-    the MIN/MAX argument field, and disallow the optimization only if this is
-    so.
+    Disallow loose index scan if the MIN/MAX argument field is referenced by
+    a subquery in the WHERE clause.
   */
+
   if (cond_type == Item::SUBSELECT_ITEM)
-    DBUG_RETURN(FALSE);
+  {
+    Item_subselect *subs_cond= (Item_subselect*) cond;
+    if (subs_cond->is_correlated)
+    {
+      DBUG_ASSERT(subs_cond->upper_refs.elements > 0);
+      List_iterator_fast<Item_subselect::Ref_to_outside>
+        li(subs_cond->upper_refs);
+      Item_subselect::Ref_to_outside *dep;
+      while ((dep= li++))
+      {
+        if (dep->item->eq(min_max_arg_item, FALSE))
+          DBUG_RETURN(FALSE);
+      }
+    }
+    DBUG_RETURN(TRUE);
+  }
 
   /*
     Condition of the form 'field' is equivalent to 'field <> 0' and thus
@@ -10038,13 +12325,14 @@ get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
       Find the range tree for the current keypart. We assume that
       index_range_tree points to the leftmost keypart in the index.
     */
-    for (cur_range= index_range_tree; cur_range;
+    for (cur_range= index_range_tree; 
+         cur_range && cur_range->type == SEL_ARG::KEY_RANGE;
          cur_range= cur_range->next_key_part)
     {
       if (cur_range->field->eq(cur_part->field))
         break;
     }
-    if (!cur_range)
+    if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
     {
       if (min_max_arg_part)
         return FALSE; /* The current keypart has no range predicates at all. */
@@ -10359,6 +12647,7 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
       quick->quick_prefix_select= get_quick_select(param, param_idx,
                                                    index_tree,
+                                                   HA_MRR_USE_DEFAULT_IMPL, 0,
                                                    &quick->alloc);
 
     /*
@@ -10442,9 +12731,9 @@ QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
    group_prefix_len(group_prefix_len_arg),
    group_key_parts(group_key_parts_arg), have_min(have_min_arg),
    have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
-   seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
+   seen_first_key(FALSE), doing_key_read(FALSE), min_max_arg_part(min_max_arg_part_arg),
    key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
-   min_functions_it(NULL), max_functions_it(NULL), 
+   min_functions_it(NULL), max_functions_it(NULL),
    is_index_scan(is_index_scan_arg)
 {
   head=       table;
@@ -10572,7 +12861,12 @@ QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
 {
   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
   if (file->inited != handler::NONE) 
+  {
+    DBUG_ASSERT(file == head->file);
+    if (doing_key_read)
+      head->disable_keyread();
     file->ha_index_end();
+  }
   if (min_max_arg_part)
     delete_dynamic(&min_max_ranges);
   free_root(&alloc,MYF(0));
@@ -10755,12 +13049,16 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
   int result;
   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
 
-  head->set_keyread(TRUE); /* We need only the key attributes */
+  if (!head->key_read)
+  {
+    doing_key_read= 1;
+    head->enable_keyread(); /* We need only the key attributes */
+  }
   if ((result= file->ha_index_init(index,1)))
     DBUG_RETURN(result);
   if (quick_prefix_select && quick_prefix_select->reset())
     DBUG_RETURN(1);
-  result= file->index_last(record);
+  result= file->ha_index_last(record);
   if (result == HA_ERR_END_OF_FILE)
     DBUG_RETURN(0);
   /* Save the prefix of the last group. */
@@ -10862,9 +13160,9 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
       first sub-group with the extended prefix.
     */
     if (!have_min && !have_max && key_infix_len > 0)
-      result= file->index_read_map(record, group_prefix,
-                                   make_prev_keypart_map(real_key_parts),
-                                   HA_READ_KEY_EXACT);
+      result= file->ha_index_read_map(record, group_prefix,
+                                      make_prev_keypart_map(real_key_parts),
+                                      HA_READ_KEY_EXACT);
 
     result= have_min ? min_res : have_max ? max_res : result;
   } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
@@ -10916,9 +13214,10 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min()
     /* Apply the constant equality conditions to the non-group select fields */
     if (key_infix_len > 0)
     {
-      if ((result= file->index_read_map(record, group_prefix,
-                                        make_prev_keypart_map(real_key_parts),
-                                        HA_READ_KEY_EXACT)))
+      if ((result=
+           file->ha_index_read_map(record, group_prefix,
+                                   make_prev_keypart_map(real_key_parts),
+                                   HA_READ_KEY_EXACT)))
         DBUG_RETURN(result);
     }
 
@@ -10933,9 +13232,9 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min()
     {
       /* Find the first subsequent record without NULL in the MIN/MAX field. */
       key_copy(tmp_record, record, index_info, 0);
-      result= file->index_read_map(record, tmp_record,
-                                   make_keypart_map(real_key_parts),
-                                   HA_READ_AFTER_KEY);
+      result= file->ha_index_read_map(record, tmp_record,
+                                      make_keypart_map(real_key_parts),
+                                      HA_READ_AFTER_KEY);
       /*
         Check if the new record belongs to the current group by comparing its
         prefix with the group's prefix. If it is from the next group, then the
@@ -10990,9 +13289,9 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max()
   if (min_max_ranges.elements > 0)
     result= next_max_in_range();
   else
-    result= file->index_read_map(record, group_prefix,
-                                 make_prev_keypart_map(real_key_parts),
-                                 HA_READ_PREFIX_LAST);
+    result= file->ha_index_read_map(record, group_prefix,
+                                    make_prev_keypart_map(real_key_parts),
+                                    HA_READ_PREFIX_LAST);
   DBUG_RETURN(result);
 }
 
@@ -11034,14 +13333,14 @@ static int index_next_different (bool is_index_scan, handler *file,
 
     while (!key_cmp (key_part, group_prefix, group_prefix_len))
     {
-      result= file->index_next(record);
+      result= file->ha_index_next(record);
       if (result)
         return(result);
     }
     return result;
   }
   else
-    return file->index_read_map(record, group_prefix,
+    return file->ha_index_read_map(record, group_prefix,
                                 make_prev_keypart_map(group_key_parts),
                                 HA_READ_AFTER_KEY);
 }
@@ -11086,7 +13385,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
   {
     if (!seen_first_key)
     {
-      result= file->index_first(record);
+      result= file->ha_index_first(record);
       if (result)
         DBUG_RETURN(result);
       seen_first_key= TRUE;
@@ -11173,7 +13472,8 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
                  HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
     }
 
-    result= file->index_read_map(record, group_prefix, keypart_map, find_flag);
+    result= file->ha_index_read_map(record, group_prefix, keypart_map,
+                                    find_flag);
     if (result)
     {
       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
@@ -11221,6 +13521,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
       /* Compare the found key with max_key. */
       int cmp_res= key_cmp(index_info->key_part, max_key,
                            real_prefix_len + min_max_arg_len);
+      my_afree(max_key);
       /*
         The key is outside of the range if: 
         the interval is open and the key is equal to the maximum boundry
@@ -11312,7 +13613,8 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
                  HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
     }
 
-    result= file->index_read_map(record, group_prefix, keypart_map, find_flag);
+    result= file->ha_index_read_map(record, group_prefix, keypart_map,
+                                    find_flag);
 
     if (result)
     {
@@ -11345,6 +13647,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
       /* Compare the found key with min_key. */
       int cmp_res= key_cmp(index_info->key_part, min_key,
                            real_prefix_len + min_max_arg_len);
+      my_afree(min_key);
       /*
         The key is outside of the range if: 
         the interval is open and the key is equal to the minimum boundry
@@ -11445,11 +13748,9 @@ void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
                                                       String *used_lengths)
 {
-  char buf[64];
-  uint length;
-  key_names->append(index_info->name);
-  length= longlong2str(max_used_key_length, buf, 10) - buf;
-  used_lengths->append(buf, length);
+  bool first= TRUE;
+
+  add_key_and_length(key_names, used_lengths, &first);
 }
 
 
@@ -11480,7 +13781,8 @@ static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
   if (!tmp.length())
     tmp.append(STRING_WITH_LEN("(empty)"));
 
-  DBUG_PRINT("info", ("SEL_TREE: 0x%lx (%s)  scans: %s", (long) tree, msg, tmp.ptr()));
+  DBUG_PRINT("info", ("SEL_TREE: 0x%lx (%s)  scans: %s", (long) tree, msg,
+                      tmp.c_ptr_safe()));
 
   DBUG_VOID_RETURN;
 }
@@ -11503,10 +13805,11 @@ static void print_ror_scans_arr(TABLE *table, const char *msg,
   }
   if (!tmp.length())
     tmp.append(STRING_WITH_LEN("(empty)"));
-  DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
+  DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.c_ptr()));
   DBUG_VOID_RETURN;
 }
 
+
 /*****************************************************************************
 ** Print a quick range for debugging
 ** TODO:
@@ -11519,7 +13822,6 @@ print_key(KEY_PART *key_part, const uchar *key, uint used_length)
 {
   char buff[1024];
   const uchar *key_end= key+used_length;
-  String tmp(buff,sizeof(buff),&my_charset_bin);
   uint store_length;
   TABLE *table= key_part->field->table;
   my_bitmap_map *old_sets[2];
@@ -11528,6 +13830,7 @@ print_key(KEY_PART *key_part, const uchar *key, uint used_length)
 
   for (; key < key_end; key+=store_length, key_part++)
   {
+    String tmp(buff,sizeof(buff),&my_charset_bin);
     Field *field=      key_part->field;
     store_length= key_part->store_length;
 
@@ -11615,8 +13918,7 @@ void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
   /* purecov: end */    
 }
 
-
-void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
+void QUICK_INDEX_SORT_SELECT::dbug_dump(int indent, bool verbose)
 {
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
   QUICK_RANGE_SELECT *quick;
@@ -11634,13 +13936,13 @@ void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
 
 void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
 {
-  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
-  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
+  QUICK_SELECT_WITH_RECORD *qr;
   fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
           indent, "", need_to_fetch_row? "":"non-");
   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
-  while ((quick= it++))
-    quick->dbug_dump(indent+2, verbose);
+  while ((qr= it++))
+    qr->quick->dbug_dump(indent+2, verbose);
   if (cpk_quick)
   {
     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 96d34bfdc0a..a52f3c2cd3a 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -52,13 +52,19 @@ typedef struct st_key_part {
 } KEY_PART;
 
 
+/*
+  A "MIN_TUPLE < tbl.key_tuple < MAX_TUPLE" interval. 
+  
+  One of endpoints may be absent. 'flags' member has flags which tell whether
+  the endpoints are '<' or '<='.
+*/
 class QUICK_RANGE :public Sql_alloc {
  public:
   uchar *min_key,*max_key;
   uint16 min_length,max_length,flag;
   key_part_map min_keypart_map, // bitmap of used keyparts in min_key
                max_keypart_map; // bitmap of used keyparts in max_key
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   uint16 dummy;					/* Avoid warnings on 'flag' */
 #endif
   QUICK_RANGE();				/* Full range */
@@ -75,7 +81,7 @@ class QUICK_RANGE :public Sql_alloc {
       min_keypart_map(min_keypart_map_arg),
       max_keypart_map(max_keypart_map_arg)
     {
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
       dummy=0;
 #endif
     }
@@ -201,13 +207,16 @@ class QUICK_RANGE :public Sql_alloc {
 
   4. Delete the select:
     delete quick;
-
+  
+  NOTE 
+    quick select doesn't use Sql_alloc/MEM_ROOT allocation because "range
+    checked for each record" functionality may create/destroy
+    O(#records_in_some_table) quick selects during query execution.
 */
 
 class QUICK_SELECT_I
 {
 public:
-  bool sorted;
   ha_rows records;  /* estimate of # of records to be retrieved */
   double  read_time; /* time to perform this retrieval          */
   TABLE   *head;
@@ -279,16 +288,22 @@ public:
 
   virtual bool reverse_sorted() = 0;
   virtual bool unique_key_range() { return false; }
-  virtual bool clustered_pk_range() { return false; }
 
+  /*
+    Request that this quick select produces sorted output. Not all quick
+    selects can do it, the caller is responsible for calling this function
+    only for those quick selects that can.
+  */
+  virtual void need_sorted_output() = 0;
   enum {
     QS_TYPE_RANGE = 0,
-    QS_TYPE_INDEX_MERGE = 1,
-    QS_TYPE_RANGE_DESC = 2,
-    QS_TYPE_FULLTEXT   = 3,
-    QS_TYPE_ROR_INTERSECT = 4,
-    QS_TYPE_ROR_UNION = 5,
-    QS_TYPE_GROUP_MIN_MAX = 6
+    QS_TYPE_INDEX_INTERSECT = 1,
+    QS_TYPE_INDEX_MERGE = 2,
+    QS_TYPE_RANGE_DESC = 3,
+    QS_TYPE_FULLTEXT   = 4,
+    QS_TYPE_ROR_INTERSECT = 5,
+    QS_TYPE_ROR_UNION = 6,
+    QS_TYPE_GROUP_MIN_MAX = 7
   };
 
   /* Get type of this quick select - one of the QS_TYPE_* values */
@@ -314,6 +329,10 @@ public:
     Save ROWID of last retrieved row in file->ref. This used in ROR-merging.
   */
   virtual void save_last_pos(){};
+  
+  void add_key_and_length(String *key_names,
+                          String *used_lengths,
+                          bool *first);
 
   /*
     Append comma-separated list of keys this quick select uses to key_names;
@@ -323,13 +342,16 @@ public:
   virtual void add_keys_and_lengths(String *key_names,
                                     String *used_lengths)=0;
 
+  void add_key_name(String *str, bool *first);
+
   /*
     Append text representation of quick select structure (what and how is
     merged) to str. The result is added to "Extra" field in EXPLAIN output.
     This function is implemented only by quick selects that merge other quick
     selects output and/or can produce output suitable for merging.
   */
-  virtual void add_info_string(String *str) {};
+  virtual void add_info_string(String *str) {}
+
   /*
     Return 1 if any index used by this quick select
     uses field which is marked in passed bitmap.
@@ -372,6 +394,22 @@ struct st_qsel_param;
 class PARAM;
 class SEL_ARG;
 
+
+/*
+  MRR range sequence, array<QUICK_RANGE> implementation: sequence traversal
+  context.
+*/
+typedef struct st_quick_range_seq_ctx
+{
+  QUICK_RANGE **first;
+  QUICK_RANGE **cur;
+  QUICK_RANGE **last;
+} QUICK_RANGE_SEQ_CTX;
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags);
+bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+
+
 /*
   Quick select that does a range scan on a single key. The records are
   returned in key order.
@@ -379,60 +417,47 @@ class SEL_ARG;
 class QUICK_RANGE_SELECT : public QUICK_SELECT_I
 {
 protected:
-  bool next,dont_free,in_ror_merged_scan;
-public:
-  int error;
-protected:
+  /* true if we enabled key only reads */
+  bool doing_key_read;
   handler *file;
-  /*
-    If true, this quick select has its "own" handler object which should be
-    closed no later then this quick select is deleted.
-  */
-  bool free_file;
-  bool in_range;
-  uint multi_range_count; /* copy from thd->variables.multi_range_count */
-  uint multi_range_length; /* the allocated length for the array */
-  uint multi_range_bufsiz; /* copy from thd->variables.read_rnd_buff_size */
-  KEY_MULTI_RANGE *multi_range; /* the multi-range array (allocated and
-                                       freed by QUICK_RANGE_SELECT) */
-  HANDLER_BUFFER *multi_range_buff; /* the handler buffer (allocated and
-                                       freed by QUICK_RANGE_SELECT) */
+
+  /* Members to deal with case when this quick select is a ROR-merged scan */
+  bool in_ror_merged_scan;
   MY_BITMAP column_bitmap, *save_read_set, *save_write_set;
+  bool free_file;   /* TRUE <=> this->file is "owned" by this quick select */
 
-  friend class TRP_ROR_INTERSECT;
-  friend
-  QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
-                                               struct st_table_ref *ref,
-                                               ha_rows records);
-  friend bool get_quick_keys(PARAM *param,
-                             QUICK_RANGE_SELECT *quick,KEY_PART *key,
-                             SEL_ARG *key_tree,
-                             uchar *min_key, uint min_key_flag,
-                             uchar *max_key, uint max_key_flag);
-  friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx,
-                                              SEL_ARG *key_tree,
-                                              MEM_ROOT *alloc);
-  friend class QUICK_SELECT_DESC;
-  friend class QUICK_INDEX_MERGE_SELECT;
-  friend class QUICK_ROR_INTERSECT_SELECT;
-  friend class QUICK_GROUP_MIN_MAX_SELECT;
+  /* Range pointers to be used when not using MRR interface */
+  /* Members needed to use the MRR interface */
+  QUICK_RANGE_SEQ_CTX qr_traversal_ctx;
+public:
+  uint mrr_flags; /* Flags to be used with MRR interface */
+protected:
+  uint mrr_buf_size; /* copy from thd->variables.mrr_buff_size */  
+  HANDLER_BUFFER *mrr_buf_desc; /* the handler buffer */
 
+  /* Info about index we're scanning */
+  
   DYNAMIC_ARRAY ranges;     /* ordered array of range ptrs */
   QUICK_RANGE **cur_range;  /* current element in ranges  */
-
+  
   QUICK_RANGE *last_range;
+  
   KEY_PART *key_parts;
   KEY_PART_INFO *key_part_info;
+  
+  bool dont_free; /* Used by QUICK_SELECT_DESC */
+
   int cmp_next(QUICK_RANGE *range);
   int cmp_prev(QUICK_RANGE *range);
   bool row_in_ranges();
 public:
   MEM_ROOT alloc;
 
-  QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0,
-                     MEM_ROOT *parent_alloc=NULL);
+  QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc,
+                     MEM_ROOT *parent_alloc, bool *create_err);
   ~QUICK_RANGE_SELECT();
-
+  
+  void need_sorted_output();
   int init();
   int reset(void);
   int get_next();
@@ -453,6 +478,38 @@ public:
   QUICK_SELECT_I *make_reverse(uint used_key_parts_arg);
 private:
   /* Default copy ctor used by QUICK_SELECT_DESC */
+  friend class TRP_ROR_INTERSECT;
+  friend
+  QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
+                                               struct st_table_ref *ref,
+                                               ha_rows records);
+  friend bool get_quick_keys(PARAM *param, QUICK_RANGE_SELECT *quick, 
+                             KEY_PART *key, SEL_ARG *key_tree, 
+                             uchar *min_key, uint min_key_flag,
+                             uchar *max_key, uint max_key_flag);
+  friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx,
+                                              SEL_ARG *key_tree,
+                                              uint mrr_flags,
+                                              uint mrr_buf_size,
+                                              MEM_ROOT *alloc);
+  friend class QUICK_SELECT_DESC;
+  friend class QUICK_INDEX_SORT_SELECT;
+  friend class QUICK_INDEX_MERGE_SELECT;
+  friend class QUICK_ROR_INTERSECT_SELECT;
+  friend class QUICK_INDEX_INTERSECT_SELECT;
+  friend class QUICK_GROUP_MIN_MAX_SELECT;
+  friend bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+  friend range_seq_t quick_range_seq_init(void *init_param,
+                                          uint n_ranges, uint flags);
+  friend 
+  int read_keys_and_merge_scans(THD *thd, TABLE *head,
+                                List<QUICK_RANGE_SELECT> quick_selects,
+                                QUICK_RANGE_SELECT *pk_quick_select,
+                                READ_RECORD *read_record,
+                                bool intersection,
+                                key_map *filtered_scans,
+                                Unique **unique_ptr);
+
 };
 
 
@@ -460,48 +517,53 @@ class QUICK_RANGE_SELECT_GEOM: public QUICK_RANGE_SELECT
 {
 public:
   QUICK_RANGE_SELECT_GEOM(THD *thd, TABLE *table, uint index_arg,
-                          bool no_alloc, MEM_ROOT *parent_alloc)
-    :QUICK_RANGE_SELECT(thd, table, index_arg, no_alloc, parent_alloc)
+                          bool no_alloc, MEM_ROOT *parent_alloc, 
+                          bool *create_err)
+    :QUICK_RANGE_SELECT(thd, table, index_arg, no_alloc, parent_alloc,
+    create_err)
     {};
   virtual int get_next();
 };
 
 
 /*
-  QUICK_INDEX_MERGE_SELECT - index_merge access method quick select.
+  QUICK_INDEX_SORT_SELECT is the base class for the common functionality of:
+  - QUICK_INDEX_MERGE_SELECT, access based on multi-index merge/union 
+  - QUICK_INDEX_INTERSECT_SELECT, access based on  multi-index intersection 
+    
 
-    QUICK_INDEX_MERGE_SELECT uses
+    QUICK_INDEX_SORT_SELECT uses
      * QUICK_RANGE_SELECTs to get rows
-     * Unique class to remove duplicate rows
+     * Unique class
+       - to remove duplicate rows for QUICK_INDEX_MERGE_SELECT
+       - to intersect rows for QUICK_INDEX_INTERSECT_SELECT
 
   INDEX MERGE OPTIMIZER
-    Current implementation doesn't detect all cases where index_merge could
+    Current implementation doesn't detect all cases where index merge could
     be used, in particular:
-     * index_merge will never be used if range scan is possible (even if
-       range scan is more expensive)
 
-     * index_merge+'using index' is not supported (this the consequence of
-       the above restriction)
+     * index_merge+'using index' is not supported
 
      * If WHERE part contains complex nested AND and OR conditions, some ways
-       to retrieve rows using index_merge will not be considered. The choice
+       to retrieve rows using index merge will not be considered. The choice
        of read plan may depend on the order of conjuncts/disjuncts in WHERE
        part of the query, see comments near imerge_list_or_list and
        SEL_IMERGE::or_sel_tree_with_checks functions for details.
 
-     * There is no "index_merge_ref" method (but index_merge on non-first
+     * There is no "index_merge_ref" method (but index merge on non-first
        table in join is possible with 'range checked for each record').
 
-    See comments around SEL_IMERGE class and test_quick_select for more
-    details.
 
   ROW RETRIEVAL ALGORITHM
 
-    index_merge uses Unique class for duplicates removal.  index_merge takes
-    advantage of Clustered Primary Key (CPK) if the table has one.
-    The index_merge algorithm consists of two phases:
+    index merge/intersection uses Unique class for duplicates removal. 
+    index merge/intersection takes advantage of Clustered Primary Key (CPK)
+    if the table has one.
+    The index merge/intersection algorithm consists of two phases:
+
+    Phase 1 
+    (implemented by a QUICK_INDEX_MERGE_SELECT::read_keys_and_merge call):
 
-    Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique):
     prepare()
     {
       activate 'index only';
@@ -515,32 +577,32 @@ public:
       deactivate 'index only';
     }
 
-    Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next
-    calls):
+    Phase 2 
+    (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls):
 
     fetch()
     {
-      retrieve all rows from row pointers stored in Unique;
+      retrieve all rows from row pointers stored in Unique
+      (merging/intersecting them);
       free Unique;
-      retrieve all rows for CPK scan;
+      if (! intersection) 
+        retrieve all rows for CPK scan;
     }
 */
 
-class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I
+class QUICK_INDEX_SORT_SELECT : public QUICK_SELECT_I
 {
+protected:
   Unique *unique;
 public:
-  QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table);
-  ~QUICK_INDEX_MERGE_SELECT();
+  QUICK_INDEX_SORT_SELECT(THD *thd, TABLE *table);
+  ~QUICK_INDEX_SORT_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
-  int  get_next();
   bool reverse_sorted() { return false; }
   bool unique_key_range() { return false; }
-  int get_type() { return QS_TYPE_INDEX_MERGE; }
-  void add_keys_and_lengths(String *key_names, String *used_lengths);
-  void add_info_string(String *str);
   bool is_keys_used(const MY_BITMAP *fields);
 #ifndef DBUG_OFF
   void dbug_dump(int indent, bool verbose);
@@ -548,21 +610,14 @@ public:
 
   bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
 
-  /* range quick selects this index_merge read consists of */
+  /* range quick selects this index merge/intersect consists of */
   List<QUICK_RANGE_SELECT> quick_selects;
 
   /* quick select that uses clustered primary key (NULL if none) */
   QUICK_RANGE_SELECT* pk_quick_select;
 
-  /* true if this select is currently doing a clustered PK scan */
-  bool  doing_pk_scan;
-
   MEM_ROOT alloc;
   THD *thd;
-  int read_keys_and_merge();
-
-  bool clustered_pk_range() { return test(pk_quick_select); }
-
   virtual bool is_valid()
   {
     List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
@@ -578,12 +633,48 @@ public:
     }
     return valid;
   }
-
+  virtual int read_keys_and_merge()= 0;
   /* used to get rows collected in Unique */
   READ_RECORD read_record;
 };
 
 
+
+class QUICK_INDEX_MERGE_SELECT : public QUICK_INDEX_SORT_SELECT
+{
+private:
+  /* true if this select is currently doing a clustered PK scan */
+  bool  doing_pk_scan;
+protected:
+  int read_keys_and_merge();
+
+public:
+  QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table)
+    :QUICK_INDEX_SORT_SELECT(thd, table) {}
+
+  int get_next();
+  int get_type() { return QS_TYPE_INDEX_MERGE; }
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
+};
+
+class QUICK_INDEX_INTERSECT_SELECT : public QUICK_INDEX_SORT_SELECT
+{
+protected:
+  int read_keys_and_merge();
+
+public:
+  QUICK_INDEX_INTERSECT_SELECT(THD *thd, TABLE *table)
+    :QUICK_INDEX_SORT_SELECT(thd, table) {}
+
+  key_map filtered_scans;
+  int get_next();
+  int get_type() { return QS_TYPE_INDEX_INTERSECT; }
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
+};
+
+
 /*
   Rowid-Ordered Retrieval (ROR) index intersection quick select.
   This quick select produces intersection of row sequences returned
@@ -611,6 +702,7 @@ public:
   ~QUICK_ROR_INTERSECT_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -623,22 +715,30 @@ public:
   void dbug_dump(int indent, bool verbose);
 #endif
   int init_ror_merged_scan(bool reuse_handler);
-  bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
+  bool push_quick_back(MEM_ROOT *alloc, QUICK_RANGE_SELECT *quick_sel_range);
+
+  class QUICK_SELECT_WITH_RECORD : public Sql_alloc
+  {
+  public:
+    QUICK_RANGE_SELECT *quick;
+    uchar *key_tuple;
+    ~QUICK_SELECT_WITH_RECORD() { delete quick; }
+  };
 
   /*
     Range quick selects this intersection consists of, not including
     cpk_quick.
   */
-  List<QUICK_RANGE_SELECT> quick_selects;
+  List<QUICK_SELECT_WITH_RECORD> quick_selects;
 
   virtual bool is_valid()
   {
-    List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
-    QUICK_RANGE_SELECT *quick;
+    List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
+    QUICK_SELECT_WITH_RECORD *quick;
     bool valid= true;
     while ((quick= it++))
     {
-      if (!quick->is_valid())
+      if (!quick->quick->is_valid())
       {
         valid= false;
         break;
@@ -681,6 +781,7 @@ public:
   ~QUICK_ROR_UNION_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -775,6 +876,8 @@ private:
   bool have_max;         /*   a MIN, a MAX, or both.         */
   bool have_agg_distinct;/*   aggregate_function(DISTINCT ...).  */
   bool seen_first_key;   /* Denotes whether the first key was retrieved.*/
+  bool doing_key_read;   /* true if we enabled key only reads */
+
   KEY_PART_INFO *min_max_arg_part; /* The keypart of the only argument field */
                                    /* of all MIN/MAX functions.              */
   uint min_max_arg_len;  /* The length of the MIN/MAX argument field */
@@ -822,6 +925,7 @@ public:
   void adjust_prefix_ranges();
   bool alloc_buffers();
   int init();
+  void need_sorted_output() { /* always do it */ }
   int reset();
   int get_next();
   bool reverse_sorted() { return false; }
@@ -864,6 +968,13 @@ class SQL_SELECT :public Sql_alloc {
  public:
   QUICK_SELECT_I *quick;	// If quick-select used
   COND		*cond;		// where condition
+
+  /*
+    When using Index Condition Pushdown: condition that we've had before
+    extracting and pushing index condition.
+    In other cases, NULL.
+  */
+  Item *pre_idx_push_select_cond;
   TABLE	*head;
   IO_CACHE file;		// Positions to used records
   ha_rows records;		// Records in use if read from file
@@ -881,29 +992,41 @@ class SQL_SELECT :public Sql_alloc {
   {
     key_map tmp;
     tmp.set_all();
-    return test_quick_select(thd, tmp, 0, limit, force_quick_range) < 0;
+    return test_quick_select(thd, tmp, 0, limit, force_quick_range, FALSE) < 0;
   }
-  inline bool skip_record(THD *thd, bool *skip_record)
+  /* 
+    RETURN
+      0   if record must be skipped <-> (cond && cond->val_int() == 0)
+     -1   if error
+      1   otherwise
+  */   
+  inline int skip_record(THD *thd)
   {
-    *skip_record= cond ? cond->val_int() == FALSE : FALSE;
-    return thd->is_error();
+    int rc= test(!cond || cond->val_int());
+    if (thd->is_error())
+      rc= -1;
+    return rc;
   }
   int test_quick_select(THD *thd, key_map keys, table_map prev_tables,
-			ha_rows limit, bool force_quick_range);
+			ha_rows limit, bool force_quick_range, 
+                        bool ordered_output);
 };
 
 
-class FT_SELECT: public QUICK_RANGE_SELECT {
+class FT_SELECT: public QUICK_RANGE_SELECT 
+{
 public:
-  FT_SELECT(THD *thd, TABLE *table, uint key) :
-      QUICK_RANGE_SELECT (thd, table, key, 1) { (void) init(); }
+  FT_SELECT(THD *thd, TABLE *table, uint key, bool *create_err) :
+      QUICK_RANGE_SELECT (thd, table, key, 1, NULL, create_err) 
+  { (void) init(); }
   ~FT_SELECT() { file->ft_end(); }
-  int init() { return error=file->ft_init(); }
+  int init() { return file->ft_init(); }
   int reset() { return 0; }
-  int get_next() { return error=file->ft_read(record); }
+  int get_next() { return file->ha_ft_read(record); }
   int get_type() { return QS_TYPE_FULLTEXT; }
 };
 
+FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key);
 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
                                              struct st_table_ref *ref,
                                              ha_rows records);
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
new file mode 100644
index 00000000000..160b783715c
--- /dev/null
+++ b/sql/opt_range_mrr.cc
@@ -0,0 +1,346 @@
+
+/****************************************************************************
+  MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
+ ****************************************************************************/
+
+/* MRR range sequence, SEL_ARG* implementation: stack entry */
+typedef struct st_range_seq_entry 
+{
+  /* 
+    Pointers in min and max keys. They point to right-after-end of key
+    images. The 0-th entry has these pointing to key tuple start.
+  */
+  uchar *min_key, *max_key;
+  
+  /* 
+    Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
+    min_key_flag may have NULL_RANGE set.
+  */
+  uint min_key_flag, max_key_flag;
+  
+  /* Number of key parts */
+  uint min_key_parts, max_key_parts;
+  SEL_ARG *key_tree;
+} RANGE_SEQ_ENTRY;
+
+
+/*
+  MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
+*/
+typedef struct st_sel_arg_range_seq
+{
+  uint keyno;      /* index of used tree in SEL_TREE structure */
+  uint real_keyno; /* Number of the index in tables */
+  PARAM *param;
+  SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
+  
+  RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
+  int i; /* Index of last used element in the above array */
+  
+  bool at_start; /* TRUE <=> The traversal has just started */
+} SEL_ARG_RANGE_SEQ;
+
+
+/*
+  Range sequence interface, SEL_ARG* implementation: Initialize the traversal
+
+  SYNOPSIS
+    init()
+      init_params  SEL_ARG tree traversal context
+      n_ranges     [ignored] The number of ranges obtained 
+      flags        [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+  RETURN
+    Value of init_param
+*/
+
+range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param;
+  seq->at_start= TRUE;
+  seq->stack[0].key_tree= NULL;
+  seq->stack[0].min_key= seq->param->min_key;
+  seq->stack[0].min_key_flag= 0;
+  seq->stack[0].min_key_parts= 0;
+
+  seq->stack[0].max_key= seq->param->max_key;
+  seq->stack[0].max_key_flag= 0;
+  seq->stack[0].max_key_parts= 0;
+  seq->i= 0;
+  return init_param;
+}
+
+
+static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree)
+{
+  RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1];
+  RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i];
+  
+  cur->key_tree= key_tree;
+  cur->min_key= prev->min_key;
+  cur->max_key= prev->max_key;
+  cur->min_key_parts= prev->min_key_parts;
+  cur->max_key_parts= prev->max_key_parts;
+
+  uint16 stor_length= arg->param->key[arg->keyno][key_tree->part].store_length;
+  cur->min_key_parts += key_tree->store_min(stor_length, &cur->min_key,
+                                            prev->min_key_flag);
+  cur->max_key_parts += key_tree->store_max(stor_length, &cur->max_key,
+                                            prev->max_key_flag);
+
+  cur->min_key_flag= prev->min_key_flag | key_tree->min_flag;
+  cur->max_key_flag= prev->max_key_flag | key_tree->max_flag;
+
+  if (key_tree->is_null_interval())
+    cur->min_key_flag |= NULL_RANGE;
+  (arg->i)++;
+}
+
+
+/*
+  Range sequence interface, SEL_ARG* implementation: get the next interval
+  
+  SYNOPSIS
+    sel_arg_range_seq_next()
+      rseq        Value returned from sel_arg_range_seq_init
+      range  OUT  Store information about the range here
+
+  DESCRIPTION
+    This is "get_next" function for Range sequence interface implementation
+    for SEL_ARG* tree.
+
+  IMPLEMENTATION
+    The traversal also updates those param members:
+      - is_ror_scan
+      - range_count
+      - max_key_part
+
+  RETURN
+    FALSE  Ok
+    TRUE   No more ranges in the sequence
+*/
+
+#if (_MSC_FULL_VER == 160030319)
+/*
+   Workaround Visual Studio 2010 RTM compiler backend bug, the function enters 
+   infinite loop.
+ */
+#pragma optimize("g", off)
+#endif
+
+bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  SEL_ARG *key_tree;
+  SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq;
+  if (seq->at_start)
+  {
+    key_tree= seq->start;
+    seq->at_start= FALSE;
+    goto walk_up_n_right;
+  }
+
+  key_tree= seq->stack[seq->i].key_tree;
+  /* Ok, we're at some "full tuple" position in the tree */
+ 
+  /* Step down if we can */
+  if (key_tree->next && key_tree->next != &null_element)
+  {
+    //step down; (update the tuple, we'll step right and stay there)
+    seq->i--;
+    step_down_to(seq, key_tree->next);
+    key_tree= key_tree->next;
+    seq->param->is_ror_scan= FALSE;
+    goto walk_right_n_up;
+  }
+
+  /* Ok, can't step down, walk left until we can step down */
+  while (1)
+  {
+    if (seq->i == 1) // can't step left
+      return 1;
+    /* Step left */
+    seq->i--;
+    key_tree= seq->stack[seq->i].key_tree;
+
+    /* Step down if we can */
+    if (key_tree->next && key_tree->next != &null_element)
+    {
+      // Step down; update the tuple
+      seq->i--;
+      step_down_to(seq, key_tree->next);
+      key_tree= key_tree->next;
+      break;
+    }
+  }
+
+  /*
+    Ok, we've stepped down from the path to previous tuple.
+    Walk right-up while we can
+  */
+walk_right_n_up:
+  while (key_tree->next_key_part && key_tree->next_key_part != &null_element && 
+         key_tree->next_key_part->part == key_tree->part + 1 &&
+         key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
+  {
+    {
+      RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+      uint min_key_length= cur->min_key - seq->param->min_key;
+      uint max_key_length= cur->max_key - seq->param->max_key;
+      uint len= cur->min_key - cur[-1].min_key;
+      if (!(min_key_length == max_key_length &&
+            !memcmp(cur[-1].min_key, cur[-1].max_key, len) &&
+            !key_tree->min_flag && !key_tree->max_flag))
+      {
+        seq->param->is_ror_scan= FALSE;
+        if (!key_tree->min_flag)
+          cur->min_key_parts += 
+            key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno],
+                                                   &cur->min_key,
+                                                   &cur->min_key_flag, MAX_KEY);
+        if (!key_tree->max_flag)
+          cur->max_key_parts += 
+            key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno],
+                                                   &cur->max_key,
+                                                   &cur->max_key_flag, MAX_KEY);
+        break;
+      }
+    }
+  
+    /*
+      Ok, current atomic interval is in form "t.field=const" and there is
+      next_key_part interval. Step right, and walk up from there.
+    */
+    key_tree= key_tree->next_key_part;
+
+walk_up_n_right:
+    while (key_tree->prev && key_tree->prev != &null_element)
+    {
+      /* Step up */
+      key_tree= key_tree->prev;
+    }
+    step_down_to(seq, key_tree);
+  }
+
+  /* Ok got a tuple */
+  RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+  uint min_key_length= cur->min_key - seq->param->min_key;
+  
+  range->ptr= (char*)(intptr)(key_tree->part);
+  if (cur->min_key_flag & GEOM_FLAG)
+  {
+    range->range_flag= cur->min_key_flag;
+
+    /* Here minimum contains also function code bits, and maximum is +inf */
+    range->start_key.key=    seq->param->min_key;
+    range->start_key.length= min_key_length;
+    range->start_key.flag=  (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+  }
+  else
+  {
+    range->range_flag= cur->min_key_flag | cur->max_key_flag;
+    
+    range->start_key.key=    seq->param->min_key;
+    range->start_key.length= cur->min_key - seq->param->min_key;
+    range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
+    range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY : 
+                                                           HA_READ_KEY_EXACT);
+
+    range->end_key.key=    seq->param->max_key;
+    range->end_key.length= cur->max_key - seq->param->max_key;
+    range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY : 
+                                                         HA_READ_AFTER_KEY);
+    range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
+
+    if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
+        (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts &&
+        (seq->param->table->key_info[seq->real_keyno].flags & HA_NOSAME) &&
+        range->start_key.length == range->end_key.length &&
+        !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length))
+      range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
+      
+    if (seq->param->is_ror_scan)
+    {
+      /*
+        If we get here, the condition on the key was converted to form
+        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
+          somecond(keyXpart{key_tree->part})"
+        Check if
+          somecond is "keyXpart{key_tree->part} = const" and
+          uncovered "tail" of KeyX parts is either empty or is identical to
+          first members of clustered primary key.
+      */
+      if (!(!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
+            (range->start_key.length == range->end_key.length) &&
+            !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) &&
+            is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1)))
+        seq->param->is_ror_scan= FALSE;
+    }
+  }
+  seq->param->range_count++;
+  seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
+  return 0;
+}
+
+#if (_MSC_FULL_VER == 160030319)
+/* VS2010 compiler bug workaround */
+#pragma optimize("g", on)
+#endif
+
+
+/****************************************************************************
+  MRR Range Sequence Interface implementation that walks array<QUICK_RANGE>
+ ****************************************************************************/
+
+/*
+  Range sequence interface implementation for array<QUICK_RANGE>: initialize
+  
+  SYNOPSIS
+    quick_range_seq_init()
+      init_param  Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
+      n_ranges    Number of ranges in the sequence (ignored)
+      flags       MRR flags (currently not used) 
+
+  RETURN
+    Opaque value to be passed to quick_range_seq_next
+*/
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
+  quick->qr_traversal_ctx.first=  (QUICK_RANGE**)quick->ranges.buffer;
+  quick->qr_traversal_ctx.cur=    (QUICK_RANGE**)quick->ranges.buffer;
+  quick->qr_traversal_ctx.last=   quick->qr_traversal_ctx.cur + 
+                                  quick->ranges.elements;
+  return &quick->qr_traversal_ctx;
+}
+
+
+/*
+  Range sequence interface implementation for array<QUICK_RANGE>: get next
+  
+  SYNOPSIS
+    quick_range_seq_next()
+      rseq        Value returned from quick_range_seq_init
+      range  OUT  Store information about the range here
+
+  RETURN
+    0  Ok
+    1  No more ranges in the sequence
+*/
+
+bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
+
+  if (ctx->cur == ctx->last)
+    return 1; /* no more ranges */
+
+  QUICK_RANGE *cur= *(ctx->cur);
+  cur->make_min_endpoint(&range->start_key);
+  cur->make_max_endpoint(&range->end_key);
+  range->range_flag= cur->flag;
+  ctx->cur++;
+  return 0;
+}
+
+
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
new file mode 100644
index 00000000000..c1fe8de51a4
--- /dev/null
+++ b/sql/opt_subselect.cc
@@ -0,0 +1,4731 @@
+/**
+  @file
+
+  @brief
+    Semi-join subquery optimizations code
+
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "sql_select.h"
+#include "opt_subselect.h"
+#include "sql_test.h"
+#include <my_bit.h>
+
+/*
+  This file contains optimizations for semi-join subqueries.
+  
+  Contents
+  --------
+  1. What is a semi-join subquery
+  2. General idea about semi-join execution
+  2.1 Correlated vs uncorrelated semi-joins
+  2.2 Mergeable vs non-mergeable semi-joins
+  3. Code-level view of semi-join processing
+  3.1 Conversion
+  3.1.1 Merged semi-join TABLE_LIST object
+  3.1.2 Non-merged semi-join data structure
+  3.2 Semi-joins and query optimization
+  3.2.1 Non-merged semi-joins and join optimization
+  3.2.2 Merged semi-joins and join optimization
+  3.3 Semi-joins and query execution
+
+  1. What is a semi-join subquery
+  -------------------------------
+  We use this definition of semi-join:
+
+    outer_tbl SEMI JOIN inner_tbl ON cond = {set of outer_tbl.row such that
+                                             exist inner_tbl.row, for which 
+                                             cond(outer_tbl.row,inner_tbl.row)
+                                             is satisfied}
+  
+  That is, semi-join operation is similar to inner join operation, with
+  exception that we don't care how many matches a row from outer_tbl has in
+  inner_tbl.
+
+  In SQL terms: a semi-join subquery is an IN subquery that is an AND-part of
+  the WHERE/ON clause.
+
+  2. General idea about semi-join execution
+  -----------------------------------------
+  We can execute semi-join in a way similar to inner join, with exception that
+  we need to somehow ensure that we do not generate record combinations that
+  differ only in rows of inner tables.
+  There is a number of different ways to achieve this property, implemented by
+  a number of semi-join execution strategies.
+  Some strategies can handle any semi-joins, other can be applied only to
+  semi-joins that have certain properties that are described below:
+
+  2.1 Correlated vs uncorrelated semi-joins
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Uncorrelated semi-joins are special in the respect that they allow to
+   - execute the subquery (possible as it's uncorrelated)
+   - somehow make sure that generated set does not have duplicates
+   - perform an inner join with outer tables.
+  
+  or, rephrasing in SQL form:
+
+  SELECT ... FROM ot WHERE ot.col IN (SELECT it.col FROM it WHERE uncorr_cond)
+    ->
+  SELECT ... FROM ot JOIN (SELECT DISTINCT it.col FROM it WHERE uncorr_cond)
+
+  2.2 Mergeable vs non-mergeable semi-joins
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Semi-join operation has some degree of commutability with inner join
+  operation: we can join subquery's tables with ouside table(s) and eliminate
+  duplicate record combination after that:
+
+    ot1 JOIN ot2 SEMI_JOIN{it1,it2} (it1 JOIN it2) ON sjcond(ot2,it*) ->
+              |
+              +-------------------------------+
+                                              v
+    ot1 SEMI_JOIN{it1,it2} (it1 JOIN it2 JOIN ot2) ON sjcond(ot2,it*)
+ 
+  In order for this to work, subquery's top-level operation must be join, and
+  grouping or ordering with limit (grouping or ordering with limit are not
+  commutative with duplicate removal). In other words, the conversion is
+  possible when the subquery doesn't have GROUP BY clause, any aggregate
+  functions*, or ORDER BY ... LIMIT clause.
+
+  Definitions:
+  - Subquery whose top-level operation is a join is called *mergeable semi-join*
+  - All other kinds of semi-join subqueries are considered non-mergeable.
+
+  *- this requirement is actually too strong, but its exceptions are too
+  complicated to be considered here.
+
+  3. Code-level view of semi-join processing
+  ------------------------------------------
+  
+  3.1 Conversion and pre-optimization data structures
+  ---------------------------------------------------
+  * When doing JOIN::prepare for the subquery, we detect that it can be
+    converted into a semi-join and register it in parent_join->sj_subselects
+
+  * At the start of parent_join->optimize(), the predicate is converted into 
+    a semi-join node. A semi-join node is a TABLE_LIST object that is linked
+    somewhere in parent_join->join_list (either it is just present there, or
+    it is a descendant of some of its members).
+  
+  There are two kinds of semi-joins:
+  - Merged semi-joins
+  - Non-merged semi-joins
+   
+  3.1.1 Merged semi-join TABLE_LIST object
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Merged semi-join object is a TABLE_LIST that contains a sub-join of 
+  subquery tables and the semi-join ON expression (in this respect it is 
+  very similar to nested outer join representation)
+  Merged semi-join represents this SQL:
+
+    ... SEMI JOIN (inner_tbl1 JOIN ... JOIN inner_tbl_n) ON sj_on_expr
+  
+  Semi-join objects of this kind have TABLE_LIST::sj_subq_pred set.
+ 
+  3.1.2 Non-merged semi-join data structure
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Non-merged semi-join object is a leaf TABLE_LIST object that has a subquery
+  that produces rows. It is similar to a base table and represents this SQL:
+    
+    ... SEMI_JOIN (SELECT non_mergeable_select) ON sj_on_expr
+  
+  Subquery items that were converted into semi-joins are removed from the WHERE
+  clause. (They do remain in PS-saved WHERE clause, and they replace themselves
+  with Item_int(1) on subsequent re-executions).
+
+  3.2 Semi-joins and join optimization
+  ------------------------------------
+  
+  3.2.1 Non-merged semi-joins and join optimization
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  For join optimization purposes, non-merged semi-join nests are similar to
+  base tables - they've got one JOIN_TAB, which can be accessed with one of
+  two methods:
+   - full table scan (representing SJ-Materialization-Scan strategy)
+   - eq_ref-like table lookup (representing SJ-Materialization-Lookup)
+
+  Unlike regular base tables, non-merged semi-joins have:
+   - non-zero JOIN_TAB::startup_cost, and
+   - join_tab->table->is_filled_at_execution()==TRUE, which means one
+     cannot do const table detection or range analysis or other table data-
+     dependent inferences
+  // instead, get_delayed_table_estimates() runs optimization on the nest so that 
+  // we get an idea about temptable size
+  
+  3.2.2 Merged semi-joins and join optimization
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   - optimize_semijoin_nests() does pre-optimization 
+   - during join optimization, the join has one JOIN_TAB (or is it POSITION?) 
+     array, and suffix-based detection is used, see advance_sj_state()
+   - after join optimization is done, get_best_combination() switches 
+     the data-structure to prefix-based, multiple JOIN_TAB ranges format.
+
+  3.3 Semi-joins and query execution
+  ----------------------------------
+  * Join executor has hooks for all semi-join strategies.
+    TODO elaborate.
+
+*/
+
+
+static
+bool subquery_types_allow_materialization(Item_in_subselect *in_subs);
+static bool replace_where_subcondition(JOIN *join, Item **expr, 
+                                       Item *old_cond, Item *new_cond,
+                                       bool do_fix_fields);
+static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
+                                 void *arg);
+static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
+static bool convert_subq_to_jtbm(JOIN *parent_join, 
+                                 Item_in_subselect *subq_pred, bool *remove);
+static TABLE_LIST *alloc_join_nest(THD *thd);
+static uint get_tmp_table_rec_length(Item **p_list, uint elements);
+static double get_tmp_table_lookup_cost(THD *thd, double row_count,
+                                        uint row_size);
+static double get_tmp_table_write_cost(THD *thd, double row_count,
+                                       uint row_size);
+bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables);
+static SJ_MATERIALIZATION_INFO *
+at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
+             uint idx, bool *loose_scan);
+void best_access_path(JOIN *join, JOIN_TAB *s, 
+                             table_map remaining_tables, uint idx, 
+                             bool disable_jbuf, double record_count,
+                             POSITION *pos, POSITION *loose_scan_pos);
+
+static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, 
+                                Item_in_subselect *subq_pred);
+static void remove_sj_conds(Item **tree);
+static bool is_cond_sj_in_equality(Item *item);
+static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab);
+static Item *remove_additional_cond(Item* conds);
+static void remove_subq_pushed_predicates(JOIN *join, Item **where);
+
+enum_nested_loop_state 
+end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
+
+
+/*
+  Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them
+
+  SYNOPSIS
+     check_and_do_in_subquery_rewrites()
+       join  Subquery's join
+
+  DESCRIPTION
+    Check if we need to do
+     - subquery -> mergeable semi-join rewrite
+     - if the subquery can be handled with materialization
+     - 'substitution' rewrite for table-less subqueries like "(select 1)"
+     - IN->EXISTS rewrite
+    and, depending on the rewrite, either do it, or record it to be done at a
+    later phase.
+
+  RETURN
+    0      - OK
+    Other  - Some sort of query error
+*/
+
+int check_and_do_in_subquery_rewrites(JOIN *join)
+{
+  THD *thd=join->thd;
+  st_select_lex *select_lex= join->select_lex;
+  st_select_lex_unit* parent_unit= select_lex->master_unit();
+  DBUG_ENTER("check_and_do_in_subquery_rewrites");
+  /*
+    If 
+      1) this join is inside a subquery (of any type except FROM-clause 
+         subquery) and
+      2) we aren't just normalizing a VIEW
+
+    Then perform early unconditional subquery transformations:
+     - Convert subquery predicate into semi-join, or
+     - Mark the subquery for execution using materialization, or
+     - Perform IN->EXISTS transformation, or
+     - Perform more/less ALL/ANY -> MIN/MAX rewrite
+     - Substitute trivial scalar-context subquery with its value
+
+    TODO: for PS, make the whole block execute only on the first execution
+  */
+  Item_subselect *subselect;
+  if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) && // (1)
+      (subselect= parent_unit->item))                                    // (2)
+  {
+    Item_in_subselect *in_subs= NULL;
+    Item_allany_subselect *allany_subs= NULL;
+    switch (subselect->substype()) {
+    case Item_subselect::IN_SUBS:
+      in_subs= (Item_in_subselect *)subselect;
+      break;
+    case Item_subselect::ALL_SUBS:
+    case Item_subselect::ANY_SUBS:
+      allany_subs= (Item_allany_subselect *)subselect;
+      break;
+    default:
+      break;
+    }
+
+
+    /* Resolve expressions and perform semantic analysis for IN query */
+    if (in_subs != NULL)
+      /*
+        TODO: Add the condition below to this if statement when we have proper
+        support for is_correlated handling for materialized semijoins.
+        If we were to add this condition now, the fix_fields() call in
+        convert_subq_to_sj() would force the flag is_correlated to be set
+        erroneously for prepared queries.
+
+        thd->stmt_arena->state != Query_arena::PREPARED)
+      */
+    {
+      /*
+        Check if the left and right expressions have the same # of
+        columns, i.e. we don't have a case like 
+          (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...)
+
+        TODO why do we have this duplicated in IN->EXISTS transformers?
+        psergey-todo: fix these: grep for duplicated_subselect_card_check
+      */
+      if (select_lex->item_list.elements != in_subs->left_expr->cols())
+      {
+        my_error(ER_OPERAND_COLUMNS, MYF(0), in_subs->left_expr->cols());
+        DBUG_RETURN(-1);
+      }
+
+      SELECT_LEX *current= thd->lex->current_select;
+      thd->lex->current_select= current->return_after_parsing();
+      char const *save_where= thd->where;
+      thd->where= "IN/ALL/ANY subquery";
+        
+      bool failure= !in_subs->left_expr->fixed &&
+                     in_subs->left_expr->fix_fields(thd, &in_subs->left_expr);
+      thd->lex->current_select= current;
+      thd->where= save_where;
+      if (failure)
+        DBUG_RETURN(-1); /* purecov: deadcode */
+    }
+    if (select_lex == parent_unit->fake_select_lex)
+    {
+      /*
+        The join and its select_lex object represent the 'fake' select used
+        to compute the result of a UNION.
+      */
+      DBUG_RETURN(0);
+    }
+
+    DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
+    /*
+      Check if we're in subquery that is a candidate for flattening into a
+      semi-join (which is done in flatten_subqueries()). The
+      requirements are:
+        1. Subquery predicate is an IN/=ANY subq predicate
+        2. Subquery is a single SELECT (not a UNION)
+        3. Subquery does not have GROUP BY or ORDER BY
+        4. Subquery does not use aggregate functions or HAVING
+        5. Subquery predicate is at the AND-top-level of ON/WHERE clause
+        6. We are not in a subquery of a single table UPDATE/DELETE that 
+             doesn't have a JOIN (TODO: We should handle this at some
+             point by switching to multi-table UPDATE/DELETE)
+        7. We're not in a table-less subquery like "SELECT 1"
+        8. No execution method was already chosen (by a prepared statement)
+        9. Parent select is not a table-less select
+        10. Neither parent nor child select have STRAIGHT_JOIN option.
+    */
+    if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
+        in_subs &&                                                    // 1
+        !select_lex->is_part_of_union() &&                            // 2
+        !select_lex->group_list.elements && !join->order &&           // 3
+        !join->having && !select_lex->with_sum_func &&                // 4
+        thd->thd_marker.emb_on_expr_nest &&                           // 5
+        select_lex->outer_select()->join &&                           // 6
+        parent_unit->first_select()->leaf_tables.elements &&          // 7
+        !in_subs->in_strategy &&                                      // 8
+        select_lex->outer_select()->leaf_tables.elements &&           // 9
+        !((join->select_options |                                     // 10
+           select_lex->outer_select()->join->select_options)          // 10
+          & SELECT_STRAIGHT_JOIN))                                    // 10
+    {
+      DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
+
+      (void)subquery_types_allow_materialization(in_subs);
+
+      in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
+      in_subs->is_flattenable_semijoin= TRUE;
+
+      /* Register the subquery for further processing in flatten_subqueries() */
+      if (!in_subs->is_registered_semijoin)
+      {
+        Query_arena *arena, backup;
+        arena= thd->activate_stmt_arena_if_needed(&backup);
+        select_lex->outer_select()->sj_subselects.push_back(in_subs);
+        if (arena)
+          thd->restore_active_arena(arena, &backup);
+        in_subs->is_registered_semijoin= TRUE;
+      }
+    }
+    else
+    {
+      DBUG_PRINT("info", ("Subquery can't be converted to merged semi-join"));
+      /* Test if the user has set a legal combination of optimizer switches. */
+      if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
+          !optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+        my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
+
+      /*
+        If the subquery predicate is IN/=ANY, analyse and set all possible
+        subquery execution strategies based on optimizer switches and syntactic
+        properties.
+      */
+      if (in_subs)
+      {
+        /*
+          Check if the subquery predicate can be executed via materialization.
+          The required conditions are:
+          0. The materialization optimizer switch was set.
+          1. Subquery is a single SELECT (not a UNION).
+             TODO: this is a limitation that can be fixed
+          2. Subquery is not a table-less query. In this case there is no
+             point in materializing.
+          2A The upper query is not a table-less SELECT ... FROM DUAL. We
+             can't do materialization for SELECT .. FROM DUAL because it
+             does not call setup_subquery_materialization(). We could make 
+             SELECT ... FROM DUAL call that function but that doesn't seem
+             to be the case that is worth handling.
+          3. Either the subquery predicate is a top-level predicate, or at
+             least one partial match strategy is enabled. If no partial match
+             strategy is enabled, then materialization cannot be used for
+             non-top-level queries because it cannot handle NULLs correctly.
+          4. Subquery is non-correlated
+             TODO:
+             This condition is too restrictive (limitation). It can be extended to:
+             (Subquery is non-correlated ||
+              Subquery is correlated to any query outer to IN predicate ||
+              (Subquery is correlated to the immediate outer query &&
+               Subquery !contains {GROUP BY, ORDER BY [LIMIT],
+               aggregate functions}) && subquery predicate is not under "NOT IN"))
+
+          (*) The subquery must be part of a SELECT statement. The current
+               condition also excludes multi-table update statements.
+        A note about prepared statements: we want the if-branch to be taken on
+        PREPARE and each EXECUTE. The rewrites are only done once, but we need 
+        select_lex->sj_subselects list to be populated for every EXECUTE. 
+
+        */
+        if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) &&      // 0
+            !select_lex->is_part_of_union() &&                            // 1
+            parent_unit->first_select()->leaf_tables.elements &&          // 2
+            thd->lex->sql_command == SQLCOM_SELECT &&                     // *
+            select_lex->outer_select()->leaf_tables.elements &&           // 2A
+            subquery_types_allow_materialization(in_subs) &&
+            (in_subs->is_top_level_item() ||                               //3
+             optimizer_flag(thd,
+                            OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
+             optimizer_flag(thd,
+                            OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3
+            !in_subs->is_correlated)                                       //4
+       {
+          in_subs->in_strategy|= SUBS_MATERIALIZATION;
+
+          /*
+            If the subquery is an AND-part of WHERE register for being processed
+            with jtbm strategy
+          */
+          if (thd->thd_marker.emb_on_expr_nest == NO_JOIN_NEST &&
+              optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN))
+          {
+            in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
+            in_subs->is_flattenable_semijoin= FALSE;
+            if (!in_subs->is_registered_semijoin)
+	    {
+              Query_arena *arena, backup;
+              arena= thd->activate_stmt_arena_if_needed(&backup);
+              select_lex->outer_select()->sj_subselects.push_back(in_subs);
+              if (arena)
+                thd->restore_active_arena(arena, &backup);
+              in_subs->is_registered_semijoin= TRUE;
+            }
+          }
+        }
+
+        /*
+          IN-TO-EXISTS is the only universal strategy. Choose it if the user
+          allowed it via an optimizer switch, or if materialization is not
+          possible.
+        */
+        if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) ||
+            !in_subs->in_strategy)
+        {
+          in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
+        }
+      }
+
+      /* Check if max/min optimization applicable */
+      if (allany_subs)
+        allany_subs->in_strategy|= (allany_subs->is_maxmin_applicable(join) ?
+                                    (SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE) :
+                                    SUBS_IN_TO_EXISTS);
+
+      /*
+        Transform each subquery predicate according to its overloaded
+        transformer.
+      */
+      if (subselect->select_transformer(join))
+        DBUG_RETURN(-1);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  @brief Check if subquery's compared types allow materialization.
+
+  @param in_subs Subquery predicate, updated as follows:
+    types_allow_materialization TRUE if subquery materialization is allowed.
+    sjm_scan_allowed            If types_allow_materialization is TRUE,
+                                indicates whether it is possible to use subquery
+                                materialization and scan the materialized table.
+
+  @retval TRUE   If subquery types allow materialization.
+  @retval FALSE  Otherwise.
+
+  @details
+    This is a temporary fix for BUG#36752.
+    
+    There are two subquery materialization strategies:
+
+    1. Materialize and do index lookups in the materialized table. See 
+       BUG#36752 for description of restrictions we need to put on the
+       compared expressions.
+
+    2. Materialize and then do a full scan of the materialized table. At the
+       moment, this strategy's applicability criteria are even stricter than
+       in #1.
+
+       This is so because of the following: consider an uncorrelated subquery
+       
+       ...WHERE (ot1.col1, ot2.col2 ...) IN (SELECT ie1,ie2,... FROM it1 ...)
+
+       and a join order that could be used to do sjm-materialization: 
+          
+          SJM-Scan(it1, it1), ot1, ot2
+       
+       IN-equalities will be parts of conditions attached to the outer tables:
+
+         ot1:  ot1.col1 = ie1 AND ... (C1)
+         ot2:  ot1.col2 = ie2 AND ... (C2)
+       
+       besides those there may be additional references to ie1 and ie2
+       generated by equality propagation. The problem with evaluating C1 and
+       C2 is that ie{1,2} refer to subquery tables' columns, while we only have 
+       current value of materialization temptable. Our solution is to 
+        * require that all ie{N} are table column references. This allows 
+          to copy the values of materialization temptable columns to the
+          original table's columns (see setup_sj_materialization for more
+          details)
+        * require that compared columns have exactly the same type. This is
+          a temporary measure to avoid BUG#36752-type problems.
+*/
+
+static 
+bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
+{
+  DBUG_ENTER("subquery_types_allow_materialization");
+
+  DBUG_ASSERT(in_subs->left_expr->fixed);
+
+  List_iterator<Item> it(in_subs->unit->first_select()->item_list);
+  uint elements= in_subs->unit->first_select()->item_list.elements;
+
+  in_subs->types_allow_materialization= FALSE;  // Assign default values
+  in_subs->sjm_scan_allowed= FALSE;
+  
+  bool all_are_fields= TRUE;
+  for (uint i= 0; i < elements; i++)
+  {
+    Item *outer= in_subs->left_expr->element_index(i);
+    Item *inner= it++;
+    all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM && 
+                       inner->real_item()->type() == Item::FIELD_ITEM);
+    if (outer->cmp_type() != inner->cmp_type())
+      DBUG_RETURN(FALSE);
+    switch (outer->cmp_type()) {
+    case STRING_RESULT:
+      if (!(outer->collation.collation == inner->collation.collation))
+        DBUG_RETURN(FALSE);
+      // Materialization does not work with BLOB columns
+      if (inner->field_type() == MYSQL_TYPE_BLOB || 
+          inner->field_type() == MYSQL_TYPE_GEOMETRY)
+        DBUG_RETURN(FALSE);
+      break;
+    case TIME_RESULT:
+      if (mysql_type_to_time_type(outer->field_type()) !=
+          mysql_type_to_time_type(outer->field_type()))
+        DBUG_RETURN(FALSE);
+    default:
+      /* suitable for materialization */
+      break;
+    }
+  }
+
+  in_subs->types_allow_materialization= TRUE;
+  in_subs->sjm_scan_allowed= all_are_fields;
+  DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed"));
+  DBUG_RETURN(TRUE);
+}
+
+
+/**
+  Apply max min optimization of all/any subselect
+*/
+
+bool JOIN::transform_max_min_subquery()
+{
+  DBUG_ENTER("JOIN::transform_max_min_subquery");
+  Item_subselect *subselect= unit->item;
+  if (!subselect || (subselect->substype() != Item_subselect::ALL_SUBS &&
+                     subselect->substype() != Item_subselect::ANY_SUBS))
+    DBUG_RETURN(0);
+  DBUG_RETURN(((Item_allany_subselect *) subselect)->
+              transform_into_max_min(this));
+}
+
+
+/*
+  Finalize IN->EXISTS conversion in case we couldn't use materialization.
+
+  DESCRIPTION  Invoke the IN->EXISTS converter
+    Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE.
+
+  RETURN 
+    FALSE - Ok
+    TRUE  - Fatal error
+*/
+
+bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item)
+{
+  DBUG_ENTER("make_in_exists_conversion");
+  JOIN *child_join= item->unit->first_select()->join;
+  bool res;
+
+  /* 
+    We're going to finalize IN->EXISTS conversion. 
+    Normally, IN->EXISTS conversion takes place inside the 
+    Item_subselect::fix_fields() call, where item_subselect->fixed==FALSE (as
+    fix_fields() haven't finished yet) and item_subselect->changed==FALSE (as 
+    the conversion haven't been finalized)
+
+    At the end of Item_subselect::fix_fields() we had to set fixed=TRUE,
+    changed=TRUE (the only other option would have been to return error).
+
+    So, now we have to set these back for the duration of select_transformer()
+    call.
+  */
+  item->changed= 0;
+  item->fixed= 0;
+
+  SELECT_LEX *save_select_lex= thd->lex->current_select;
+  thd->lex->current_select= item->unit->first_select();
+
+  res= item->select_transformer(child_join);
+
+  thd->lex->current_select= save_select_lex;
+
+  if (res)
+    DBUG_RETURN(TRUE);
+
+  item->changed= 1;
+  item->fixed= 1;
+
+  Item *substitute= item->substitution;
+  bool do_fix_fields= !item->substitution->fixed;
+  /*
+    The Item_subselect has already been wrapped with Item_in_optimizer, so we
+    should search for item->optimizer, not 'item'.
+  */
+  Item *replace_me= item->optimizer;
+  DBUG_ASSERT(replace_me==substitute);
+
+  Item **tree= (item->emb_on_expr_nest == NO_JOIN_NEST)?
+                 &join->conds : &(item->emb_on_expr_nest->on_expr);
+  if (replace_where_subcondition(join, tree, replace_me, substitute, 
+                                 do_fix_fields))
+    DBUG_RETURN(TRUE);
+  item->substitution= NULL;
+   
+    /*
+      If this is a prepared statement, repeat the above operation for
+      prep_where (or prep_on_expr). 
+    */
+  if (!thd->stmt_arena->is_conventional())
+  {
+    tree= (item->emb_on_expr_nest == (TABLE_LIST*)NO_JOIN_NEST)?
+           &join->select_lex->prep_where : 
+           &(item->emb_on_expr_nest->prep_on_expr);
+
+    if (replace_where_subcondition(join, tree, replace_me, substitute, 
+                                   FALSE))
+      DBUG_RETURN(TRUE);
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+bool check_for_outer_joins(List<TABLE_LIST> *join_list)
+{
+  TABLE_LIST *table;
+  NESTED_JOIN *nested_join;
+  List_iterator<TABLE_LIST> li(*join_list);
+  while ((table= li++))
+  {
+    if ((nested_join= table->nested_join))
+    {
+      if (check_for_outer_joins(&nested_join->join_list))
+        return TRUE;
+    }
+    
+    if (table->outer_join)
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Convert semi-join subquery predicates into semi-join join nests
+
+  SYNOPSIS
+    convert_join_subqueries_to_semijoins()
+ 
+  DESCRIPTION
+
+    Convert candidate subquery predicates into semi-join join nests. This 
+    transformation is performed once in query lifetime and is irreversible.
+    
+    Conversion of one subquery predicate
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    We start with a join that has a semi-join subquery:
+
+      SELECT ...
+      FROM ot, ...
+      WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
+
+    and convert it into a semi-join nest:
+
+      SELECT ...
+      FROM ot SEMI JOIN (it1 ... itN), ...
+      WHERE outer_where AND subq_where AND oe=ie
+
+    that is, in order to do the conversion, we need to 
+
+     * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
+       query's FROM structure.
+     * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if
+       the subquery predicate was in an ON expression)
+     * Remove the subquery predicate from the parent query's WHERE
+
+    Considerations when converting many predicates
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    A join may have at most MAX_TABLES tables. This may prevent us from
+    flattening all subqueries when the total number of tables in parent and
+    child selects exceeds MAX_TABLES.
+    We deal with this problem by flattening children's subqueries first and
+    then using a heuristic rule to determine each subquery predicate's
+    "priority".
+
+  RETURN 
+    FALSE  OK
+    TRUE   Error
+*/
+
+bool convert_join_subqueries_to_semijoins(JOIN *join)
+{
+  Query_arena *arena, backup;
+  Item_in_subselect *in_subq;
+  THD *thd= join->thd;
+  List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
+  DBUG_ENTER("convert_join_subqueries_to_semijoins");
+
+  if (join->select_lex->sj_subselects.is_empty())
+    DBUG_RETURN(FALSE);
+
+  List_iterator_fast<Item_in_subselect> li(join->select_lex->sj_subselects);
+
+  while ((in_subq= li++))
+  {
+    SELECT_LEX *subq_sel= in_subq->get_select_lex();
+    if (subq_sel->handle_derived(thd->lex, DT_OPTIMIZE))
+      DBUG_RETURN(1);
+    if (subq_sel->handle_derived(thd->lex, DT_MERGE))
+      DBUG_RETURN(TRUE);
+    subq_sel->update_used_tables();
+  }
+
+  li.rewind();
+  /* First, convert child join's subqueries. We proceed bottom-up here */
+  while ((in_subq= li++)) 
+  {
+    st_select_lex *child_select= in_subq->get_select_lex();
+    JOIN *child_join= child_select->join;
+    child_join->outer_tables = child_join->table_count;
+
+    /*
+      child_select->where contains only the WHERE predicate of the
+      subquery itself here. We may be selecting from a VIEW, which has its
+      own predicate. The combined predicates are available in child_join->conds,
+      which was built by setup_conds() doing prepare_where() for all views.
+    */
+    child_select->where= child_join->conds;
+
+    if (convert_join_subqueries_to_semijoins(child_join))
+      DBUG_RETURN(TRUE);
+    in_subq->sj_convert_priority= 
+      test(in_subq->emb_on_expr_nest != NO_JOIN_NEST) * MAX_TABLES * 2 +
+      in_subq->is_correlated * MAX_TABLES + child_join->outer_tables;
+  }
+  
+  // Temporary measure: disable semi-joins when they are together with outer
+  // joins.
+#if 0  
+  if (check_for_outer_joins(join->join_list))
+  {
+    in_subq= join->select_lex->sj_subselects.head();
+    arena= thd->activate_stmt_arena_if_needed(&backup);
+    goto skip_conversion;
+  }
+#endif
+  //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
+  /* 
+    2. Pick which subqueries to convert:
+      sort the subquery array
+      - prefer correlated subqueries over uncorrelated;
+      - prefer subqueries that have greater number of outer tables;
+  */
+  bubble_sort<Item_in_subselect>(&join->select_lex->sj_subselects,
+				 subq_sj_candidate_cmp, NULL);
+  // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES
+  /* Replace all subqueries to be flattened with Item_int(1) */
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+ 
+  li.rewind();
+  while ((in_subq= li++))
+  {
+    bool remove_item= TRUE;
+
+    /* Stop processing if we've reached a subquery that's attached to the ON clause */
+    if (in_subq->emb_on_expr_nest != NO_JOIN_NEST)
+      break;
+
+    if (in_subq->is_flattenable_semijoin) 
+    {
+      if (join->table_count + 
+          in_subq->unit->first_select()->join->table_count >= MAX_TABLES)
+        break;
+      if (convert_subq_to_sj(join, in_subq))
+        DBUG_RETURN(TRUE);
+    }
+    else
+    {
+      if (join->table_count + 1 >= MAX_TABLES)
+        break;
+      if (convert_subq_to_jtbm(join, in_subq, &remove_item))
+        DBUG_RETURN(TRUE);
+    }
+    if (remove_item)
+    {
+      Item **tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
+                     &join->conds : &(in_subq->emb_on_expr_nest->on_expr);
+      Item *replace_me= in_subq->original_item();
+      if (replace_where_subcondition(join, tree, replace_me, new Item_int(1),
+                                     FALSE))
+        DBUG_RETURN(TRUE); /* purecov: inspected */
+    }
+  }
+//skip_conversion:
+  /* 
+    3. Finalize (perform IN->EXISTS rewrite) the subqueries that we didn't
+    convert:
+  */
+  while (in_subq)
+  {
+    JOIN *child_join= in_subq->unit->first_select()->join;
+    in_subq->changed= 0;
+    in_subq->fixed= 0;
+
+    SELECT_LEX *save_select_lex= thd->lex->current_select;
+    thd->lex->current_select= in_subq->unit->first_select();
+
+    bool res= in_subq->select_transformer(child_join);
+
+    thd->lex->current_select= save_select_lex;
+
+    if (res)
+      DBUG_RETURN(TRUE);
+
+    in_subq->changed= 1;
+    in_subq->fixed= 1;
+
+    Item *substitute= in_subq->substitution;
+    bool do_fix_fields= !in_subq->substitution->fixed;
+    Item **tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
+                   &join->conds : &(in_subq->emb_on_expr_nest->on_expr);
+    Item *replace_me= in_subq->original_item();
+    if (replace_where_subcondition(join, tree, replace_me, substitute, 
+                                   do_fix_fields))
+      DBUG_RETURN(TRUE);
+    in_subq->substitution= NULL;
+#if 0
+    /* 
+      Don't do the following, because the simplify_join() call is after this
+      call, and that call will save to prep_wher/prep_on_expr.
+    */
+
+    /*
+      If this is a prepared statement, repeat the above operation for
+      prep_where (or prep_on_expr). Subquery-to-semijoin conversion is 
+      done once for prepared statement.
+    */
+    if (!thd->stmt_arena->is_conventional())
+    {
+      tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
+             &join->select_lex->prep_where : 
+             &(in_subq->emb_on_expr_nest->prep_on_expr);
+
+      if (replace_where_subcondition(join, tree, replace_me, substitute, 
+                                     FALSE))
+        DBUG_RETURN(TRUE);
+    }
+#endif
+    /*
+      Revert to the IN->EXISTS strategy in the rare case when the subquery could
+      not be flattened.
+      TODO: This is a limitation done for simplicity. Such subqueries could also
+      be executed via materialization. In order to determine this, we should
+      re-run the test for materialization that was done in
+      check_and_do_in_subquery_rewrites.
+    */
+    in_subq->in_strategy= SUBS_IN_TO_EXISTS;
+    in_subq= li++;
+  }
+
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  join->select_lex->sj_subselects.empty();
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Get #output_rows and scan_time estimates for a "delayed" table.
+
+  SYNOPSIS
+    get_delayed_table_estimates()
+      table         IN    Table to get estimates for
+      out_rows      OUT   E(#rows in the table)
+      scan_time     OUT   E(scan_time).
+      startup_cost  OUT   cost to populate the table.
+
+  DESCRIPTION
+    Get #output_rows and scan_time estimates for a "delayed" table. By
+    "delayed" here we mean that the table is filled at the start of query
+    execution. This means that the optimizer can't use table statistics to 
+    get #rows estimate for it, it has to call this function instead.
+
+    This function is expected to make different actions depending on the nature
+    of the table. At the moment there is only one kind of delayed tables,
+    non-flattenable semi-joins.
+*/
+
+void get_delayed_table_estimates(TABLE *table,
+                                 ha_rows *out_rows, 
+                                 double *scan_time,
+                                 double *startup_cost)
+{
+  Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
+
+  DBUG_ASSERT(item->engine->engine_type() ==
+              subselect_engine::HASH_SJ_ENGINE);
+
+  subselect_hash_sj_engine *hash_sj_engine=
+    ((subselect_hash_sj_engine*)item->engine);
+
+  *out_rows= (ha_rows)item->jtbm_record_count;
+  *startup_cost= item->jtbm_read_time;
+
+  /* Calculate cost of scanning the temptable */
+  double data_size= item->jtbm_record_count * 
+                    hash_sj_engine->tmp_table->s->reclength;
+  /* Do like in handler::read_time */
+  *scan_time= data_size/IO_SIZE + 2;
+} 
+
+
+/**
+   @brief Replaces an expression destructively inside the expression tree of
+   the WHERE clase.
+
+   @note Because of current requirements for semijoin flattening, we do not
+   need to recurse here, hence this function will only examine the top-level
+   AND conditions. (see JOIN::prepare, comment starting with "Check if the 
+   subquery predicate can be executed via materialization".
+   
+   @param join The top-level query.
+   @param old_cond The expression to be replaced.
+   @param new_cond The expression to be substituted.
+   @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for
+   the new expression.
+   @return <code>true</code> if there was an error, <code>false</code> if
+   successful.
+*/
+
+static bool replace_where_subcondition(JOIN *join, Item **expr, 
+                                       Item *old_cond, Item *new_cond,
+                                       bool do_fix_fields)
+{
+  //Item **expr= (emb_nest == (TABLE_LIST*)1)? &join->conds : &emb_nest->on_expr;
+  if (*expr == old_cond)
+  {
+    *expr= new_cond;
+    if (do_fix_fields)
+      new_cond->fix_fields(join->thd, expr);
+    return FALSE;
+  }
+  
+  if ((*expr)->type() == Item::COND_ITEM) 
+  {
+    List_iterator<Item> li(*((Item_cond*)(*expr))->argument_list());
+    Item *item;
+    while ((item= li++))
+    {
+      if (item == old_cond) 
+      {
+        li.replace(new_cond);
+        if (do_fix_fields)
+          new_cond->fix_fields(join->thd, li.ref());
+        return FALSE;
+      }
+    }
+  }
+  // If we came here it means there were an error during prerequisites check.
+  DBUG_ASSERT(0);
+  return TRUE;
+}
+
+static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
+                                 void *arg)
+{
+  return (el1->sj_convert_priority > el2->sj_convert_priority) ? 1 : 
+         ( (el1->sj_convert_priority == el2->sj_convert_priority)? 0 : -1);
+}
+
+
+/*
+  Convert a subquery predicate into a TABLE_LIST semi-join nest
+
+  SYNOPSIS
+    convert_subq_to_sj()
+       parent_join  Parent join, the one that has subq_pred in its WHERE/ON 
+                    clause
+       subq_pred    Subquery predicate to be converted
+  
+  DESCRIPTION
+    Convert a subquery predicate into a TABLE_LIST semi-join nest. All the 
+    prerequisites are already checked, so the conversion is always successfull.
+
+    Prepared Statements: the transformation is permanent:
+     - Changes in TABLE_LIST structures are naturally permanent
+     - Item tree changes are performed on statement MEM_ROOT:
+        = we activate statement MEM_ROOT 
+        = this function is called before the first fix_prepare_information
+          call.
+
+    This is intended because the criteria for subquery-to-sj conversion remain
+    constant for the lifetime of the Prepared Statement.
+
+  RETURN
+    FALSE  OK
+    TRUE   Out of memory error
+*/
+
+static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
+{
+  SELECT_LEX *parent_lex= parent_join->select_lex;
+  TABLE_LIST *emb_tbl_nest= NULL;
+  List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
+  THD *thd= parent_join->thd;
+  DBUG_ENTER("convert_subq_to_sj");
+
+  /*
+    1. Find out where to put the predicate into.
+     Note: for "t1 LEFT JOIN t2" this will be t2, a leaf.
+  */
+  if ((void*)subq_pred->emb_on_expr_nest != (void*)NO_JOIN_NEST)
+  {
+    if (subq_pred->emb_on_expr_nest->nested_join)
+    {
+      /*
+        We're dealing with
+
+          ... [LEFT] JOIN  ( ... ) ON (subquery AND whatever) ...
+
+        The sj-nest will be inserted into the brackets nest.
+      */
+      emb_tbl_nest=  subq_pred->emb_on_expr_nest;
+      emb_join_list= &emb_tbl_nest->nested_join->join_list;
+    }
+    else if (!subq_pred->emb_on_expr_nest->outer_join)
+    {
+      /*
+        We're dealing with
+
+          ... INNER JOIN tblX ON (subquery AND whatever) ...
+
+        The sj-nest will be tblX's "sibling", i.e. another child of its
+        parent. This is ok because tblX is joined as an inner join.
+      */
+      emb_tbl_nest= subq_pred->emb_on_expr_nest->embedding;
+      if (emb_tbl_nest)
+        emb_join_list= &emb_tbl_nest->nested_join->join_list;
+    }
+    else if (!subq_pred->emb_on_expr_nest->nested_join)
+    {
+      TABLE_LIST *outer_tbl= subq_pred->emb_on_expr_nest;
+      TABLE_LIST *wrap_nest;
+      /*
+        We're dealing with
+
+          ... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
+
+        we'll need to convert it into:
+
+          ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
+                        |                      |
+                        |<----- wrap_nest ---->|
+        
+        Q:  other subqueries may be pointing to this element. What to do?
+        A1: simple solution: copy *subq_pred->expr_join_nest= *parent_nest.
+            But we'll need to fix other pointers.
+        A2: Another way: have TABLE_LIST::next_ptr so the following
+            subqueries know the table has been nested.
+        A3: changes in the TABLE_LIST::outer_join will make everything work
+            automatically.
+      */
+      if (!(wrap_nest= alloc_join_nest(parent_join->thd)))
+      {
+        DBUG_RETURN(TRUE);
+      }
+      wrap_nest->embedding= outer_tbl->embedding;
+      wrap_nest->join_list= outer_tbl->join_list;
+      wrap_nest->alias= (char*) "(sj-wrap)";
+
+      wrap_nest->nested_join->join_list.empty();
+      wrap_nest->nested_join->join_list.push_back(outer_tbl);
+
+      outer_tbl->embedding= wrap_nest;
+      outer_tbl->join_list= &wrap_nest->nested_join->join_list;
+
+      /*
+        wrap_nest will take place of outer_tbl, so move the outer join flag
+        and on_expr
+      */
+      wrap_nest->outer_join= outer_tbl->outer_join;
+      outer_tbl->outer_join= 0;
+
+      wrap_nest->on_expr= outer_tbl->on_expr;
+      outer_tbl->on_expr= NULL;
+
+      List_iterator<TABLE_LIST> li(*wrap_nest->join_list);
+      TABLE_LIST *tbl;
+      while ((tbl= li++))
+      {
+        if (tbl == outer_tbl)
+        {
+          li.replace(wrap_nest);
+          break;
+        }
+      }
+      /*
+        Ok now wrap_nest 'contains' outer_tbl and we're ready to add the 
+        semi-join nest into it
+      */
+      emb_join_list= &wrap_nest->nested_join->join_list;
+      emb_tbl_nest=  wrap_nest;
+    }
+  }
+
+  TABLE_LIST *sj_nest;
+  NESTED_JOIN *nested_join;
+  if (!(sj_nest= alloc_join_nest(parent_join->thd)))
+  {
+    DBUG_RETURN(TRUE);
+  }
+  nested_join= sj_nest->nested_join;
+
+  sj_nest->join_list= emb_join_list;
+  sj_nest->embedding= emb_tbl_nest;
+  sj_nest->alias= (char*) "(sj-nest)";
+  sj_nest->sj_subq_pred= subq_pred;
+  /* Nests do not participate in those 'chains', so: */
+  /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
+  emb_join_list->push_back(sj_nest);
+
+  /* 
+    nested_join->used_tables and nested_join->not_null_tables are
+    initialized in simplify_joins().
+  */
+  
+  /* 
+    2. Walk through subquery's top list and set 'embedding' to point to the
+       sj-nest.
+  */
+  st_select_lex *subq_lex= subq_pred->unit->first_select();
+  nested_join->join_list.empty();
+  List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list);
+  TABLE_LIST *tl;
+  while ((tl= li++))
+  {
+    tl->embedding= sj_nest;
+    tl->join_list= &nested_join->join_list;
+    nested_join->join_list.push_back(tl);
+  }
+  
+  /*
+    Reconnect the next_leaf chain.
+    TODO: Do we have to put subquery's tables at the end of the chain?
+          Inserting them at the beginning would be a bit faster.
+    NOTE: We actually insert them at the front! That's because the order is
+          reversed in this list.
+  */
+  parent_lex->leaf_tables.concat(&subq_lex->leaf_tables);
+
+  /*
+    Same as above for next_local chain
+    (a theory: a next_local chain always starts with ::leaf_tables
+     because view's tables are inserted after the view)
+  */
+  for (tl= parent_lex->leaf_tables.head(); tl->next_local; tl= tl->next_local) ;
+  tl->next_local= subq_lex->leaf_tables.head();
+
+  /* A theory: no need to re-connect the next_global chain */
+
+  /* 3. Remove the original subquery predicate from the WHERE/ON */
+
+  // The subqueries were replaced for Item_int(1) earlier
+  subq_pred->in_strategy= SUBS_SEMI_JOIN;         // for subsequent executions
+  /*TODO: also reset the 'with_subselect' there. */
+
+  /* n. Adjust the parent_join->table_count counter */
+  uint table_no= parent_join->table_count;
+  /* n. Walk through child's tables and adjust table->map */
+  List_iterator_fast<TABLE_LIST> si(subq_lex->leaf_tables);
+  while ((tl= si++))
+  {
+    tl->table->tablenr= table_no;
+    tl->table->map= ((table_map)1) << table_no;
+    if (tl->is_jtbm())
+      tl->jtbm_table_no= tl->table->tablenr;
+    SELECT_LEX *old_sl= tl->select_lex;
+    tl->select_lex= parent_join->select_lex; 
+    for (TABLE_LIST *emb= tl->embedding;
+         emb && emb->select_lex == old_sl;
+         emb= emb->embedding)
+      emb->select_lex= parent_join->select_lex;
+    table_no++;
+  }
+  parent_join->table_count += subq_lex->join->table_count;
+  //parent_join->table_count += subq_lex->leaf_tables.elements;
+
+  /* 
+    Put the subquery's WHERE into semi-join's sj_on_expr
+    Add the subquery-induced equalities too.
+  */
+  SELECT_LEX *save_lex= thd->lex->current_select;
+  thd->lex->current_select=subq_lex;
+  if (!subq_pred->left_expr->fixed &&
+       subq_pred->left_expr->fix_fields(thd, &subq_pred->left_expr))
+    DBUG_RETURN(TRUE);
+  thd->lex->current_select=save_lex;
+
+  sj_nest->nested_join->sj_corr_tables= subq_pred->used_tables();
+  sj_nest->nested_join->sj_depends_on=  subq_pred->used_tables() |
+                                        subq_pred->left_expr->used_tables();
+  sj_nest->sj_on_expr= subq_lex->join->conds;
+
+  /*
+    Create the IN-equalities and inject them into semi-join's ON expression.
+    Additionally, for LooseScan strategy
+     - Record the number of IN-equalities.
+     - Create list of pointers to (oe1, ..., ieN). We'll need the list to
+       see which of the expressions are bound and which are not (for those
+       we'll produce a distinct stream of (ie_i1,...ie_ik).
+
+       (TODO: can we just create a list of pointers and hope the expressions
+       will not substitute themselves on fix_fields()? or we need to wrap
+       them into Item_direct_view_refs and store pointers to those. The
+       pointers to Item_direct_view_refs are guaranteed to be stable as 
+       Item_direct_view_refs doesn't substitute itself with anything in 
+       Item_direct_view_ref::fix_fields.
+  */
+  sj_nest->sj_in_exprs= subq_pred->left_expr->cols();
+  sj_nest->nested_join->sj_outer_expr_list.empty();
+
+  if (subq_pred->left_expr->cols() == 1)
+  {
+    nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr);
+    Item_func_eq *item_eq=
+      new Item_func_eq(subq_pred->left_expr, subq_lex->ref_pointer_array[0]);
+    item_eq->in_equality_no= 0;
+    sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
+  }
+  else
+  {
+    for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+    {
+      nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr->
+                                                element_index(i));
+      Item_func_eq *item_eq= 
+        new Item_func_eq(subq_pred->left_expr->element_index(i), 
+                         subq_lex->ref_pointer_array[i]);
+      item_eq->in_equality_no= i;
+      sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
+    }
+  }
+  /* Fix the created equality and AND */
+  if (!sj_nest->sj_on_expr->fixed)
+    sj_nest->sj_on_expr->fix_fields(parent_join->thd, &sj_nest->sj_on_expr);
+
+  /*
+    Walk through sj nest's WHERE and ON expressions and call
+    item->fix_table_changes() for all items.
+  */
+  sj_nest->sj_on_expr->fix_after_pullout(parent_lex, &sj_nest->sj_on_expr);
+  fix_list_after_tbl_changes(parent_lex, &sj_nest->nested_join->join_list);
+
+
+  /* Unlink the child select_lex so it doesn't show up in EXPLAIN: */
+  subq_lex->master_unit()->exclude_level();
+
+  DBUG_EXECUTE("where",
+               print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY););
+
+  /* Inject sj_on_expr into the parent's WHERE or ON */
+  if (emb_tbl_nest)
+  {
+    emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, 
+                                     sj_nest->sj_on_expr);
+    emb_tbl_nest->on_expr->top_level_item();
+    if (!emb_tbl_nest->on_expr->fixed)
+      emb_tbl_nest->on_expr->fix_fields(parent_join->thd,
+                                        &emb_tbl_nest->on_expr);
+  }
+  else
+  {
+    /* Inject into the WHERE */
+    parent_join->conds= and_items(parent_join->conds, sj_nest->sj_on_expr);
+    parent_join->conds->top_level_item();
+    /*
+      fix_fields must update the properties (e.g. st_select_lex::cond_count of
+      the correct select_lex.
+    */
+    save_lex= thd->lex->current_select;
+    thd->lex->current_select=parent_join->select_lex;
+    if (!parent_join->conds->fixed)
+      parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds);
+    thd->lex->current_select=save_lex;
+    parent_join->select_lex->where= parent_join->conds;
+  }
+
+  if (subq_lex->ftfunc_list->elements)
+  {
+    Item_func_match *ifm;
+    List_iterator_fast<Item_func_match> li(*(subq_lex->ftfunc_list));
+    while ((ifm= li++))
+      parent_lex->ftfunc_list->push_front(ifm);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+const int SUBQERY_TEMPTABLE_NAME_MAX_LEN= 20;
+
+static void create_subquery_temptable_name(char *to, uint number)
+{
+  DBUG_ASSERT(number < 10000);       
+  to= strmov(to, "<subquery");
+  to= int10_to_str((int) number, to, 10);
+  to[0]= '>';
+  to[1]= 0;
+}
+
+
+/*
+  Convert subquery predicate into non-mergeable semi-join nest.
+
+  TODO: 
+    why does this do IN-EXISTS conversion? Can't we unify it with mergeable
+    semi-joins? currently, convert_subq_to_sj() cannot fail to convert (unless
+    fatal errors)
+
+    
+  RETURN 
+    FALSE - Ok
+    TRUE  - Fatal error
+*/
+
+static bool convert_subq_to_jtbm(JOIN *parent_join, 
+                                 Item_in_subselect *subq_pred, 
+                                 bool *remove_item)
+{
+  SELECT_LEX *parent_lex= parent_join->select_lex;
+  List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
+  TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins
+  TABLE_LIST *tl;
+  double rows;
+  double read_time;
+  DBUG_ENTER("convert_subq_to_jtbm");
+
+  subq_pred->in_strategy &= ~SUBS_IN_TO_EXISTS;
+  subq_pred->optimize(&rows, &read_time);
+
+  subq_pred->jtbm_read_time= read_time;
+  subq_pred->jtbm_record_count=rows;
+  subq_pred->is_jtbm_merged= TRUE;
+
+  if (subq_pred->engine->engine_type() != subselect_engine::HASH_SJ_ENGINE)
+  {
+    *remove_item= FALSE;
+    DBUG_RETURN(FALSE);
+  }
+
+
+  *remove_item= TRUE;
+
+  TABLE_LIST *jtbm;
+  char *tbl_alias;
+  if (!(tbl_alias= (char*)parent_join->thd->calloc(SUBQERY_TEMPTABLE_NAME_MAX_LEN)) ||
+      !(jtbm= alloc_join_nest(parent_join->thd))) //todo: this is not a join nest!
+  {
+    DBUG_RETURN(TRUE);
+  }
+
+  jtbm->join_list= emb_join_list;
+  jtbm->embedding= emb_tbl_nest;
+  jtbm->jtbm_subselect= subq_pred;
+  jtbm->nested_join= NULL;
+
+  /* Nests do not participate in those 'chains', so: */
+  /* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/
+  emb_join_list->push_back(jtbm);
+  
+  /* 
+    Inject the jtbm table into TABLE_LIST::next_leaf list, so that 
+    make_join_statistics() and co. can find it.
+  */
+  parent_lex->leaf_tables.push_back(jtbm);
+
+  /*
+    Same as above for TABLE_LIST::next_local chain
+    (a theory: a next_local chain always starts with ::leaf_tables
+     because view's tables are inserted after the view)
+  */
+  for (tl= parent_lex->leaf_tables.head(); tl->next_local; tl= tl->next_local)
+  {}
+  tl->next_local= jtbm;
+
+  /* A theory: no need to re-connect the next_global chain */
+
+  subselect_hash_sj_engine *hash_sj_engine=
+    ((subselect_hash_sj_engine*)subq_pred->engine);
+  jtbm->table= hash_sj_engine->tmp_table;
+
+  jtbm->table->tablenr= parent_join->table_count;
+  jtbm->table->map= table_map(1) << (parent_join->table_count);
+  jtbm->jtbm_table_no= jtbm->table->tablenr;
+
+  parent_join->table_count++;
+  DBUG_ASSERT(parent_join->table_count < MAX_TABLES);
+
+  Item *conds= hash_sj_engine->semi_join_conds;
+  conds->fix_after_pullout(parent_lex, &conds);
+
+  DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY););
+  
+  create_subquery_temptable_name(tbl_alias, hash_sj_engine->materialize_join->
+                                              select_lex->select_number);
+  jtbm->alias= tbl_alias;
+#if 0
+  /* Inject sj_on_expr into the parent's WHERE or ON */
+  if (emb_tbl_nest)
+  {
+    DBUG_ASSERT(0);
+    /*emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, 
+                                     sj_nest->sj_on_expr);
+    emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr);
+    */
+  }
+  else
+  {
+    /* Inject into the WHERE */
+    parent_join->conds= and_items(parent_join->conds, conds);
+    parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds);
+    parent_join->select_lex->where= parent_join->conds;
+  }
+#endif
+  /* Don't unlink the child subselect, as the subquery will be used. */
+
+  DBUG_RETURN(FALSE);
+}
+
+
+static TABLE_LIST *alloc_join_nest(THD *thd)
+{
+  TABLE_LIST *tbl;
+  if (!(tbl= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+
+                                       sizeof(NESTED_JOIN))))
+    return NULL;
+  tbl->nested_join= (NESTED_JOIN*) ((uchar*)tbl + 
+                                    ALIGN_SIZE(sizeof(TABLE_LIST)));
+  return tbl;
+}
+
+
+void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist)
+{
+  List_iterator<TABLE_LIST> it(*tlist);
+  TABLE_LIST *table;
+  while ((table= it++))
+  {
+    if (table->on_expr)
+      table->on_expr->fix_after_pullout(new_parent, &table->on_expr);
+    if (table->nested_join)
+      fix_list_after_tbl_changes(new_parent, &table->nested_join->join_list);
+  }
+}
+
+
+static void set_emb_join_nest(List<TABLE_LIST> *tables, TABLE_LIST *emb_sj_nest)
+{
+  List_iterator<TABLE_LIST> it(*tables);
+  TABLE_LIST *tbl;
+  while ((tbl= it++))
+  {
+    /*
+      Note: check for nested_join first. 
+       derived-merged tables have tbl->table!=NULL &&
+       tbl->table->reginfo==NULL.
+    */
+    if (tbl->nested_join)
+      set_emb_join_nest(&tbl->nested_join->join_list, emb_sj_nest);
+    else if (tbl->table)
+      tbl->table->reginfo.join_tab->emb_sj_nest= emb_sj_nest;
+
+  }
+}
+
+/*
+  Pull tables out of semi-join nests, if possible
+
+  SYNOPSIS
+    pull_out_semijoin_tables()
+      join  The join where to do the semi-join flattening
+
+  DESCRIPTION
+    Try to pull tables out of semi-join nests.
+     
+    PRECONDITIONS
+    When this function is called, the join may have several semi-join nests
+    but it is guaranteed that one semi-join nest does not contain another.
+   
+    ACTION
+    A table can be pulled out of the semi-join nest if
+     - It is a constant table, or
+     - It is accessed via eq_ref(outer_tables)
+
+    POSTCONDITIONS
+     * Tables that were pulled out have JOIN_TAB::emb_sj_nest == NULL
+     * Tables that were not pulled out have JOIN_TAB::emb_sj_nest pointing 
+       to semi-join nest they are in.
+     * Semi-join nests' TABLE_LIST::sj_inner_tables is updated accordingly
+
+    This operation is (and should be) performed at each PS execution since
+    tables may become/cease to be constant across PS reexecutions.
+    
+  NOTE
+    Table pullout may make uncorrelated subquery correlated. Consider this
+    example:
+    
+     ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... ) 
+    
+    here table it1 can be pulled out (we have it1.primary_key=oe which gives
+    us functional dependency). Once it1 is pulled out, all references to it1
+    from p(it1, it2) become references to outside of the subquery and thus
+    make the subquery (i.e. its semi-join nest) correlated.
+    Making the subquery (i.e. its semi-join nest) correlated prevents us from
+    using Materialization or LooseScan to execute it. 
+
+  RETURN 
+    0 - OK
+    1 - Out of memory error
+*/
+
+int pull_out_semijoin_tables(JOIN *join)
+{
+  TABLE_LIST *sj_nest;
+  DBUG_ENTER("pull_out_semijoin_tables");
+  List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
+   
+  /* Try pulling out of the each of the semi-joins */
+  while ((sj_nest= sj_list_it++))
+  {
+    List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
+    TABLE_LIST *tbl;
+
+    /*
+      Don't do table pull-out for nested joins (if we get nested joins here, it
+      means these are outer joins. It is theoretically possible to do pull-out
+      for some of the outer tables but we dont support this currently.
+    */
+    bool have_join_nest_children= FALSE;
+
+    set_emb_join_nest(&sj_nest->nested_join->join_list, sj_nest);
+
+    while ((tbl= child_li++))
+    {
+      if (tbl->nested_join)
+      {
+        have_join_nest_children= TRUE;
+        break;
+      }
+    }
+    
+    
+    table_map pulled_tables= 0;
+    if (have_join_nest_children)
+      goto skip;
+
+    /* Action #1: Mark the constant tables to be pulled out */
+    child_li.rewind();
+    while ((tbl= child_li++))
+    {
+      if (tbl->table)
+      {
+        tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
+#if 0 
+        /* 
+          Do not pull out tables because they are constant. This operation has
+          a problem:
+          - Some constant tables may become/cease to be constant across PS
+            re-executions
+          - Contrary to our initial assumption, it turned out that table pullout 
+            operation is not easily undoable.
+
+          The solution is to leave constant tables where they are. This will
+          affect only constant tables that are 1-row or empty, tables that are
+          constant because they are accessed via eq_ref(const) access will
+          still be pulled out as functionally-dependent.
+
+          This will cause us to miss the chance to flatten some of the 
+          subqueries, but since const tables do not generate many duplicates,
+          it really doesn't matter that much whether they were pulled out or
+          not.
+
+          All of this was done as fix for BUG#43768.
+        */
+        if (tbl->table->map & join->const_table_map)
+        {
+          pulled_tables |= tbl->table->map;
+          DBUG_PRINT("info", ("Table %s pulled out (reason: constant)",
+                              tbl->table->alias));
+        }
+#endif
+      }
+    }
+    
+    /*
+      Action #2: Find which tables we can pull out based on
+      update_ref_and_keys() data. Note that pulling one table out can allow
+      us to pull out some other tables too.
+    */
+    bool pulled_a_table;
+    do 
+    {
+      pulled_a_table= FALSE;
+      child_li.rewind();
+      while ((tbl= child_li++))
+      {
+        if (tbl->table && !(pulled_tables & tbl->table->map))
+        {
+          if (find_eq_ref_candidate(tbl->table, 
+                                    sj_nest->nested_join->used_tables & 
+                                    ~pulled_tables))
+          {
+            pulled_a_table= TRUE;
+            pulled_tables |= tbl->table->map;
+            DBUG_PRINT("info", ("Table %s pulled out (reason: func dep)",
+                                tbl->table->alias.c_ptr()));
+            /*
+              Pulling a table out of uncorrelated subquery in general makes
+              makes it correlated. See the NOTE to this funtion. 
+            */
+            sj_nest->sj_subq_pred->is_correlated= TRUE;
+            sj_nest->nested_join->sj_corr_tables|= tbl->table->map;
+            sj_nest->nested_join->sj_depends_on|= tbl->table->map;
+          }
+        }
+      }
+    } while (pulled_a_table);
+ 
+    child_li.rewind();
+  skip:
+    /*
+      Action #3: Move the pulled out TABLE_LIST elements to the parents.
+    */
+    table_map inner_tables= sj_nest->nested_join->used_tables & 
+                            ~pulled_tables;
+    /* Record the bitmap of inner tables */
+    sj_nest->sj_inner_tables= inner_tables;
+    if (pulled_tables)
+    {
+      List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL)?
+                                           (&sj_nest->embedding->nested_join->join_list): 
+                                           (&join->select_lex->top_join_list);
+      Query_arena *arena, backup;
+      arena= join->thd->activate_stmt_arena_if_needed(&backup);
+      while ((tbl= child_li++))
+      {
+        if (tbl->table)
+        {
+          if (inner_tables & tbl->table->map)
+          {
+            /* This table is not pulled out */
+            tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
+          }
+          else
+          {
+            /* This table has been pulled out of the semi-join nest */
+            tbl->table->reginfo.join_tab->emb_sj_nest= NULL;
+            /*
+              Pull the table up in the same way as simplify_joins() does:
+              update join_list and embedding pointers but keep next[_local]
+              pointers.
+            */
+            child_li.remove();
+            sj_nest->nested_join->used_tables &= ~tbl->table->map;
+            upper_join_list->push_back(tbl);
+            tbl->join_list= upper_join_list;
+            tbl->embedding= sj_nest->embedding;
+          }
+        }
+      }
+
+      /* Remove the sj-nest itself if we've removed everything from it */
+      if (!inner_tables)
+      {
+        List_iterator<TABLE_LIST> li(*upper_join_list);
+        /* Find the sj_nest in the list. */
+        while (sj_nest != li++) ;
+        li.remove();
+        /* Also remove it from the list of SJ-nests: */
+        sj_list_it.remove();
+      }
+
+      if (arena)
+        join->thd->restore_active_arena(arena, &backup);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+
+/* 
+  Optimize semi-join nests that could be run with sj-materialization
+
+  SYNOPSIS
+    optimize_semijoin_nests()
+      join           The join to optimize semi-join nests for
+      all_table_map  Bitmap of all tables in the join
+
+  DESCRIPTION
+    Optimize each of the semi-join nests that can be run with
+    materialization. For each of the nests, we
+     - Generate the best join order for this "sub-join" and remember it;
+     - Remember the sub-join execution cost (it's part of materialization
+       cost);
+     - Calculate other costs that will be incurred if we decide 
+       to use materialization strategy for this semi-join nest.
+
+    All obtained information is saved and will be used by the main join
+    optimization pass.
+
+  RETURN
+    FALSE  Ok 
+    TRUE   Out of memory error
+*/
+
+bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
+{
+  DBUG_ENTER("optimize_semijoin_nests");
+  List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
+  TABLE_LIST *sj_nest;
+  while ((sj_nest= sj_list_it++))
+  {
+    /* semi-join nests with only constant tables are not valid */
+   /// DBUG_ASSERT(sj_nest->sj_inner_tables & ~join->const_table_map);
+
+    sj_nest->sj_mat_info= NULL;
+    /*
+      The statement may have been executed with 'semijoin=on' earlier.
+      We need to verify that 'semijoin=on' still holds.
+     */
+    if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
+        optimizer_flag(join->thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+    {
+      if ((sj_nest->sj_inner_tables  & ~join->const_table_map) && /* not everything was pulled out */
+          !sj_nest->sj_subq_pred->is_correlated && 
+           sj_nest->sj_subq_pred->types_allow_materialization)
+      {
+        join->emb_sjm_nest= sj_nest;
+        if (choose_plan(join, all_table_map &~join->const_table_map))
+          DBUG_RETURN(TRUE); /* purecov: inspected */
+        /*
+          The best plan to run the subquery is now in join->best_positions,
+          save it.
+        */
+        uint n_tables= my_count_bits(sj_nest->sj_inner_tables & ~join->const_table_map);
+        SJ_MATERIALIZATION_INFO* sjm;
+        if (!(sjm= new SJ_MATERIALIZATION_INFO) ||
+            !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)*
+                                                          n_tables)))
+          DBUG_RETURN(TRUE); /* purecov: inspected */
+        sjm->tables= n_tables;
+        sjm->is_used= FALSE;
+        double subjoin_out_rows, subjoin_read_time;
+
+        /*
+        join->get_partial_cost_and_fanout(n_tables + join->const_tables,
+                                          table_map(-1),
+                                          &subjoin_read_time, 
+                                          &subjoin_out_rows);
+        */
+        join->get_prefix_cost_and_fanout(n_tables, 
+                                         &subjoin_read_time,
+                                         &subjoin_out_rows);
+
+        sjm->materialization_cost.convert_from_cost(subjoin_read_time);
+        sjm->rows= subjoin_out_rows;
+        
+        // Don't use the following list because it has "stale" items. use
+        // ref_pointer_array instead:
+        //
+        //List<Item> &right_expr_list= 
+        //  sj_nest->sj_subq_pred->unit->first_select()->item_list;
+        /*
+          Adjust output cardinality estimates. If the subquery has form
+
+           ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
+
+           then the number of distinct output record combinations has an
+           upper bound of product of number of records matching the tables 
+           that are used by the SELECT clause.
+           TODO:
+             We can get a more precise estimate if we
+              - use rec_per_key cardinality estimates. For simple cases like 
+                "oe IN (SELECT t.key ...)" it is trivial. 
+              - Functional dependencies between the tables in the semi-join
+                nest (the payoff is probably less here?)
+          
+          See also get_post_group_estimate().
+        */
+        SELECT_LEX *subq_select= sj_nest->sj_subq_pred->unit->first_select();
+        {
+          for (uint i=0 ; i < join->const_tables + sjm->tables ; i++)
+          {
+            JOIN_TAB *tab= join->best_positions[i].table;
+            join->map2table[tab->table->tablenr]= tab;
+          }
+          //List_iterator<Item> it(right_expr_list);
+          Item **ref_array= subq_select->ref_pointer_array;
+          Item **ref_array_end= ref_array + subq_select->item_list.elements; 
+          table_map map= 0;
+          //while ((item= it++))
+          for (;ref_array < ref_array_end; ref_array++)
+            map |= (*ref_array)->used_tables();
+          map= map & ~PSEUDO_TABLE_BITS;
+          Table_map_iterator tm_it(map);
+          int tableno;
+          double rows= 1.0;
+          while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+            rows *= join->map2table[tableno]->table->quick_condition_rows;
+          sjm->rows= min(sjm->rows, rows);
+        }
+        memcpy(sjm->positions, join->best_positions + join->const_tables, 
+               sizeof(POSITION) * n_tables);
+
+        /*
+          Calculate temporary table parameters and usage costs
+        */
+        uint rowlen= get_tmp_table_rec_length(subq_select->ref_pointer_array,
+                                              subq_select->item_list.elements);
+        double lookup_cost= get_tmp_table_lookup_cost(join->thd,
+                                                      subjoin_out_rows, rowlen);
+        double write_cost= get_tmp_table_write_cost(join->thd,
+                                                    subjoin_out_rows, rowlen);
+
+        /*
+          Let materialization cost include the cost to write the data into the
+          temporary table:
+        */ 
+        sjm->materialization_cost.add_io(subjoin_out_rows, write_cost);
+        
+        /*
+          Set the cost to do a full scan of the temptable (will need this to 
+          consider doing sjm-scan):
+        */ 
+        sjm->scan_cost.zero();
+        sjm->scan_cost.add_io(sjm->rows, lookup_cost);
+
+        sjm->lookup_cost.convert_from_cost(lookup_cost);
+        sj_nest->sj_mat_info= sjm;
+        DBUG_EXECUTE("opt", print_sjm(sjm););
+      }
+    }
+  }
+  join->emb_sjm_nest= NULL;
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Get estimated record length for semi-join materialization temptable
+  
+  SYNOPSIS
+    get_tmp_table_rec_length()
+      items  IN subquery's select list.
+
+  DESCRIPTION
+    Calculate estimated record length for semi-join materialization
+    temptable. It's an estimate because we don't follow every bit of
+    create_tmp_table()'s logic. This isn't necessary as the return value of
+    this function is used only for cost calculations.
+
+  RETURN
+    Length of the temptable record, in bytes
+*/
+
+static uint get_tmp_table_rec_length(Item **p_items, uint elements)
+{
+  uint len= 0;
+  Item *item;
+  //List_iterator<Item> it(items);
+  Item **p_item;
+  for (p_item= p_items; p_item < p_items + elements ; p_item++)
+  {
+    item = *p_item;
+    switch (item->result_type()) {
+    case REAL_RESULT:
+      len += sizeof(double);
+      break;
+    case INT_RESULT:
+      if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
+        len += 8;
+      else
+        len += 4;
+      break;
+    case STRING_RESULT:
+      enum enum_field_types type;
+      /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type.  */
+      if ((type= item->field_type()) == MYSQL_TYPE_DATETIME ||
+          type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE ||
+          type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY)
+        len += 8;
+      else
+        len += item->max_length;
+      break;
+    case DECIMAL_RESULT:
+      len += 10;
+      break;
+    case ROW_RESULT:
+    default:
+      DBUG_ASSERT(0); /* purecov: deadcode */
+      break;
+    }
+  }
+  return len;
+}
+
+
+/**
+  The cost of a lookup into a unique hash/btree index on a temporary table
+  with 'row_count' rows each of size 'row_size'.
+
+  @param thd  current query context
+  @param row_count  number of rows in the temp table
+  @param row_size   average size in bytes of the rows
+
+  @return  the cost of one lookup
+*/
+
+static double
+get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size)
+{
+  if (row_count * row_size > thd->variables.max_heap_table_size)
+    return (double) DISK_TEMPTABLE_LOOKUP_COST;
+  else
+    return (double) HEAP_TEMPTABLE_LOOKUP_COST;
+}
+
+/**
+  The cost of writing a row into a temporary table with 'row_count' unique
+  rows each of size 'row_size'.
+
+  @param thd  current query context
+  @param row_count  number of rows in the temp table
+  @param row_size   average size in bytes of the rows
+
+  @return  the cost of writing one row
+*/
+
+static double
+get_tmp_table_write_cost(THD *thd, double row_count, uint row_size)
+{
+  double lookup_cost= get_tmp_table_lookup_cost(thd, row_count, row_size);
+  /*
+    TODO:
+    This is an optimistic estimate. Add additional costs resulting from
+    actually writing the row to memory/disk and possible index reorganization.
+  */
+  return lookup_cost;
+}
+
+
+/*
+  Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
+
+  SYNOPSIS
+    find_eq_ref_candidate()
+      table             Table to be checked
+      sj_inner_tables   Bitmap of inner tables. eq_ref(inner_table) doesn't
+                        count.
+
+  DESCRIPTION
+    Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
+
+  TODO
+    Check again if it is feasible to factor common parts with constant table
+    search
+
+    Also check if it's feasible to factor common parts with table elimination
+
+  RETURN
+    TRUE  - There exists an eq_ref(outer-tables) candidate
+    FALSE - Otherwise
+*/
+
+bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
+{
+  KEYUSE *keyuse= table->reginfo.join_tab->keyuse;
+
+  if (keyuse)
+  {
+    do
+    {
+      uint key= keyuse->key;
+      KEY *keyinfo;
+      key_part_map bound_parts= 0;
+      bool is_excluded_key= keyuse->is_for_hash_join(); 
+      if (!is_excluded_key)
+      {
+        keyinfo= table->key_info + key;
+        is_excluded_key= !test(keyinfo->flags & HA_NOSAME);
+      }
+      if (!is_excluded_key)
+      {
+        do  /* For all equalities on all key parts */
+        {
+          /* Check if this is "t.keypart = expr(outer_tables) */
+          if (!(keyuse->used_tables & sj_inner_tables) &&
+              !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
+          {
+            bound_parts |= 1 << keyuse->keypart;
+          }
+          keyuse++;
+        } while (keyuse->key == key && keyuse->table == table);
+
+        if (bound_parts == PREV_BITS(uint, keyinfo->key_parts))
+          return TRUE;
+      }
+      else
+      {
+        do
+        {
+          keyuse++;
+        } while (keyuse->key == key && keyuse->table == table);
+      }
+    } while (keyuse->table == table);
+  }
+  return FALSE;
+}
+
+
+/*
+  Do semi-join optimization step after we've added a new tab to join prefix
+
+  SYNOPSIS
+    advance_sj_state()
+      join                        The join we're optimizing
+      remaining_tables            Tables not in the join prefix
+      new_join_tab                Join tab we've just added to the join prefix
+      idx                         Index of this join tab (i.e. number of tables
+                                  in the prefix minus one)
+      current_record_count INOUT  Estimate of #records in join prefix's output
+      current_read_time    INOUT  Cost to execute the join prefix
+      loose_scan_pos       IN     A POSITION with LooseScan plan to access 
+                                  table new_join_tab
+                                  (produced by the last best_access_path call)
+
+  DESCRIPTION
+    Update semi-join optimization state after we've added another tab (table 
+    and access method) to the join prefix.
+    
+    The state is maintained in join->positions[#prefix_size]. Each of the
+    available strategies has its own state variables.
+    
+    for each semi-join strategy
+    {
+      update strategy's state variables;
+
+      if (join prefix has all the tables that are needed to consider
+          using this strategy for the semi-join(s))
+      {
+        calculate cost of using the strategy
+        if ((this is the first strategy to handle the semi-join nest(s)  ||
+            the cost is less than other strategies))
+        {
+          // Pick this strategy
+          pos->sj_strategy= ..
+          ..
+        }
+      }
+
+    Most of the new state is saved join->positions[idx] (and hence no undo
+    is necessary). Several members of class JOIN are updated also, these
+    changes can be rolled back with restore_prev_sj_state().
+
+    See setup_semijoin_dups_elimination() for a description of what kinds of
+    join prefixes each strategy can handle.
+*/
+
+void advance_sj_state(JOIN *join, table_map remaining_tables, 
+                      const JOIN_TAB *new_join_tab, uint idx, 
+                      double *current_record_count, double *current_read_time, 
+                      POSITION *loose_scan_pos)
+{
+  TABLE_LIST *emb_sj_nest;
+  POSITION *pos= join->positions + idx;
+  remaining_tables &= ~new_join_tab->table->map;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
+
+  pos->prefix_cost.convert_from_cost(*current_read_time);
+  pos->prefix_record_count= *current_record_count;
+  pos->sj_strategy= SJ_OPT_NONE;
+  
+  pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
+  /* Initialize the state or copy it from prev. tables */
+  if (idx == join->const_tables)
+  {
+    pos->invalidate_firstmatch_prefix();
+    pos->first_loosescan_table= MAX_TABLES; 
+    pos->dupsweedout_tables= 0;
+    pos->sjm_scan_need_tables= 0;
+    LINT_INIT(pos->sjm_scan_last_inner);
+  }
+  else
+  {
+    // FirstMatch
+    pos->first_firstmatch_table=
+      (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ?
+      MAX_TABLES : pos[-1].first_firstmatch_table;
+    pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl;
+    pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables;
+
+    // LooseScan
+    pos->first_loosescan_table=
+      (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ?
+      MAX_TABLES : pos[-1].first_loosescan_table;
+    pos->loosescan_need_tables= pos[-1].loosescan_need_tables;
+
+    // SJ-Materialization Scan
+    pos->sjm_scan_need_tables=
+      (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ?
+      0 : pos[-1].sjm_scan_need_tables;
+    pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner;
+
+    // Duplicate Weedout
+    pos->dupsweedout_tables=      pos[-1].dupsweedout_tables;
+    pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table;
+  }
+  
+  table_map handled_by_fm_or_ls= 0;
+  /* FirstMatch Strategy */
+  if (new_join_tab->emb_sj_nest &&
+      optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) &&
+      !join->outer_join)
+  {
+    const table_map outer_corr_tables=
+      new_join_tab->emb_sj_nest->nested_join->sj_corr_tables |
+      new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
+    const table_map sj_inner_tables=
+      new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map;
+
+    /* 
+      Enter condition:
+       1. The next join tab belongs to semi-join nest
+          (verified for the encompassing code block above).
+       2. We're not in a duplicate producer range yet
+       3. All outer tables that
+           - the subquery is correlated with, or
+           - referred to from the outer_expr 
+          are in the join prefix
+       4. All inner tables are still part of remaining_tables.
+    */
+    if (!join->cur_sj_inner_tables &&              // (2)
+        !(remaining_tables & outer_corr_tables) && // (3)
+        (sj_inner_tables ==                        // (4)
+         ((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
+    {
+      /* Start tracking potential FirstMatch range */
+      pos->first_firstmatch_table= idx;
+      pos->firstmatch_need_tables= sj_inner_tables;
+      pos->first_firstmatch_rtbl= remaining_tables;
+    }
+
+    if (pos->in_firstmatch_prefix())
+    {
+      if (outer_corr_tables & pos->first_firstmatch_rtbl)
+      {
+        /*
+          Trying to add an sj-inner table whose sj-nest has an outer correlated 
+          table that was not in the prefix. This means FirstMatch can't be used.
+        */
+        pos->invalidate_firstmatch_prefix();
+      }
+      else
+      {
+        /* Record that we need all of this semi-join's inner tables, too */
+        pos->firstmatch_need_tables|= sj_inner_tables;
+      }
+    
+      if (pos->in_firstmatch_prefix() && 
+          !(pos->firstmatch_need_tables & remaining_tables))
+      {
+        /*
+          Got a complete FirstMatch range.
+            Calculate correct costs and fanout
+        */
+        optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx,
+                                   remaining_tables, FALSE, idx,
+                                   current_record_count, 
+                                   current_read_time);
+        /*
+          We don't yet know what are the other strategies, so pick the
+          FirstMatch.
+
+          We ought to save the alternate POSITIONs produced by
+          optimize_wo_join_buffering but the problem is that providing save
+          space uses too much space. Instead, we will re-calculate the
+          alternate POSITIONs after we've picked the best QEP.
+        */
+        pos->sj_strategy= SJ_OPT_FIRST_MATCH;
+        handled_by_fm_or_ls=  pos->firstmatch_need_tables;
+      }
+    }
+  }
+
+  /* LooseScan Strategy */
+  {
+    POSITION *first=join->positions+pos->first_loosescan_table; 
+    /* 
+      LooseScan strategy can't handle interleaving between tables from the 
+      semi-join that LooseScan is handling and any other tables.
+
+      If we were considering LooseScan for the join prefix (1)
+         and the table we're adding creates an interleaving (2)
+      then 
+         stop considering loose scan
+    */
+    if ((pos->first_loosescan_table != MAX_TABLES) &&   // (1)
+        (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
+        new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
+    {
+      pos->first_loosescan_table= MAX_TABLES;
+    }
+
+    /*
+      If we got an option to use LooseScan for the current table, start
+      considering using LooseScan strategy
+    */
+    if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
+    {
+      pos->first_loosescan_table= idx;
+      pos->loosescan_need_tables=
+        new_join_tab->emb_sj_nest->sj_inner_tables | 
+        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+    }
+    
+    if ((pos->first_loosescan_table != MAX_TABLES) && 
+        !(remaining_tables & pos->loosescan_need_tables))
+    {
+      /* 
+        Ok we have LooseScan plan and also have all LooseScan sj-nest's
+        inner tables and outer correlated tables into the prefix.
+      */
+
+      first=join->positions + pos->first_loosescan_table; 
+      uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
+      /* Got a complete LooseScan range. Calculate its cost */
+      /*
+        The same problem as with FirstMatch - we need to save POSITIONs
+        somewhere but reserving space for all cases would require too
+        much space. We will re-calculate POSITION structures later on. 
+      */
+      optimize_wo_join_buffering(join, pos->first_loosescan_table, idx,
+                                 remaining_tables, 
+                                 TRUE,  //first_alt
+                                 disable_jbuf ? join->table_count :
+                                   pos->first_loosescan_table + n_tables,
+                                 current_record_count,
+                                 current_read_time);
+      /*
+        We don't yet have any other strategies that could handle this
+        semi-join nest (the other options are Duplicate Elimination or
+        Materialization, which need at least the same set of tables in 
+        the join prefix to be considered) so unconditionally pick the 
+        LooseScan.
+      */
+      pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
+      handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables;
+    }
+  }
+
+  /* 
+    Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
+    LooseScan detector in best_access_path)
+  */
+  if ((emb_sj_nest= new_join_tab->emb_sj_nest))
+  {
+    join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+    join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
+
+    /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
+    if (!(remaining_tables &
+          emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
+      join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+  }
+  join->cur_dups_producing_tables &= ~handled_by_fm_or_ls;
+
+  /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */
+  bool sjm_scan;
+  SJ_MATERIALIZATION_INFO *mat_info;
+  if ((mat_info= at_sjmat_pos(join, remaining_tables,
+                              new_join_tab, idx, &sjm_scan)))
+  {
+    if (sjm_scan)
+    {
+      /*
+        We can't yet evaluate this option yet. This is because we can't
+        accout for fanout of sj-inner tables yet:
+
+          ntX  SJM-SCAN(it1 ... itN) | ot1 ... otN  |
+                                     ^(1)           ^(2)
+
+        we're now at position (1). SJM temptable in general has multiple
+        records, so at point (1) we'll get the fanout from sj-inner tables (ie
+        there will be multiple record combinations).
+
+        The final join result will not contain any semi-join produced
+        fanout, i.e. tables within SJM-SCAN(...) will not contribute to
+        the cardinality of the join output.  Extra fanout produced by 
+        SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ...  otN.
+
+        The simple way to model this is to remove SJM-SCAN(...) fanout once
+        we reach the point #2.
+      */
+      pos->sjm_scan_need_tables=
+        new_join_tab->emb_sj_nest->sj_inner_tables | 
+        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+      pos->sjm_scan_last_inner= idx;
+    }
+    else
+    {
+      /* This is SJ-Materialization with lookups */
+      COST_VECT prefix_cost; 
+      signed int first_tab= (int)idx - mat_info->tables;
+      double prefix_rec_count;
+      if (first_tab < (int)join->const_tables)
+      {
+        prefix_cost.zero();
+        prefix_rec_count= 1.0;
+      }
+      else
+      {
+        prefix_cost= join->positions[first_tab].prefix_cost;
+        prefix_rec_count= join->positions[first_tab].prefix_record_count;
+      }
+
+      double mat_read_time= prefix_cost.total_cost();
+      mat_read_time += mat_info->materialization_cost.total_cost() +
+                       prefix_rec_count * mat_info->lookup_cost.total_cost();
+
+      if (mat_read_time < *current_read_time || join->cur_dups_producing_tables)
+      {
+        /*
+          NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
+          elements to join->positions as that makes it hard to return things
+          back when making one step back in join optimization. That's done 
+          after the QEP has been chosen.
+        */
+        pos->sj_strategy= SJ_OPT_MATERIALIZE;
+        *current_read_time=    mat_read_time;
+        *current_record_count= prefix_rec_count;
+        join->cur_dups_producing_tables&=
+          ~new_join_tab->emb_sj_nest->sj_inner_tables;
+      }
+    }
+  }
+  
+  /* 4.A SJM-Scan second phase check */
+  if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */
+      !(pos->sjm_scan_need_tables & remaining_tables))
+  {
+    TABLE_LIST *mat_nest= 
+      join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest;
+    SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
+
+    double prefix_cost;
+    double prefix_rec_count;
+    int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables;
+    /* Get the prefix cost */
+    if (first_tab == (int)join->const_tables)
+    {
+      prefix_rec_count= 1.0;
+      prefix_cost= 0.0;
+    }
+    else
+    {
+      prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+      prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+    }
+
+    /* Add materialization cost */
+    prefix_cost += mat_info->materialization_cost.total_cost() +
+                   prefix_rec_count * mat_info->scan_cost.total_cost();
+    prefix_rec_count *= mat_info->rows;
+    
+    uint i;
+    table_map rem_tables= remaining_tables;
+    for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
+      rem_tables |= join->positions[i].table->table->map;
+
+    POSITION curpos, dummy;
+    /* Need to re-run best-access-path as we prefix_rec_count has changed */
+    for (i= first_tab + mat_info->tables; i <= idx; i++)
+    {
+      best_access_path(join, join->positions[i].table, rem_tables, i,
+                       disable_jbuf, prefix_rec_count, &curpos, &dummy);
+      prefix_rec_count *= curpos.records_read;
+      prefix_cost += curpos.read_time;
+    }
+
+    /*
+      Use the strategy if 
+       * it is cheaper then what we've had, or
+       * we haven't picked any other semi-join strategy yet
+      In the second case, we pick this strategy unconditionally because
+      comparing cost without semi-join duplicate removal with cost with
+      duplicate removal is not an apples-to-apples comparison.
+    */
+    if (prefix_cost < *current_read_time || join->cur_dups_producing_tables)
+    {
+      pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
+      *current_read_time=    prefix_cost;
+      *current_record_count= prefix_rec_count;
+      join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables;
+
+    }
+  }
+
+  /* 5. Duplicate Weedout strategy handler */
+  {
+    /* 
+       Duplicate weedout can be applied after all ON-correlated and 
+       correlated 
+    */
+    TABLE_LIST *nest;
+    if ((nest= new_join_tab->emb_sj_nest))
+    {
+      if (!pos->dupsweedout_tables)
+        pos->first_dupsweedout_table= idx;
+
+      pos->dupsweedout_tables |= nest->sj_inner_tables |
+                                 nest->nested_join->sj_depends_on |
+                                 nest->nested_join->sj_corr_tables;
+    }
+    
+    if (pos->dupsweedout_tables)
+    {
+      /* we're in the process of constructing a DuplicateWeedout range */
+      TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding;
+      /* and we've entered an inner side of an outer join*/
+      if (emb && emb->on_expr)
+        pos->dupsweedout_tables |= emb->nested_join->used_tables;
+    }
+
+    if (pos->dupsweedout_tables && 
+        !(remaining_tables &
+          ~new_join_tab->table->map & pos->dupsweedout_tables))
+    {
+      /*
+        Ok, reached a state where we could put a dups weedout point.
+        Walk back and calculate
+          - the join cost (this is needed as the accumulated cost may assume 
+            some other duplicate elimination method)
+          - extra fanout that will be removed by duplicate elimination
+          - duplicate elimination cost
+        There are two cases:
+          1. We have other strategy/ies to remove all of the duplicates.
+          2. We don't.
+        
+        We need to calculate the cost in case #2 also because we need to make
+        choice between this join order and others.
+      */
+      uint first_tab= pos->first_dupsweedout_table;
+      double dups_cost;
+      double prefix_rec_count;
+      double sj_inner_fanout= 1.0;
+      double sj_outer_fanout= 1.0;
+      uint temptable_rec_size;
+      if (first_tab == join->const_tables)
+      {
+        prefix_rec_count= 1.0;
+        temptable_rec_size= 0;
+        dups_cost= 0.0;
+      }
+      else
+      {
+        dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+        prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+        temptable_rec_size= 8; /* This is not true but we'll make it so */
+      }
+      
+      table_map dups_removed_fanout= 0;
+      for (uint j= pos->first_dupsweedout_table; j <= idx; j++)
+      {
+        POSITION *p= join->positions + j;
+        dups_cost += p->read_time;
+        if (p->table->emb_sj_nest)
+        {
+          sj_inner_fanout *= p->records_read;
+          dups_removed_fanout |= p->table->table->map;
+        }
+        else
+        {
+          sj_outer_fanout *= p->records_read;
+          temptable_rec_size += p->table->table->file->ref_length;
+        }
+      }
+
+      /*
+        Add the cost of temptable use. The table will have sj_outer_fanout
+        records, and we will make 
+        - sj_outer_fanout table writes
+        - sj_inner_fanout*sj_outer_fanout  lookups.
+
+      */
+      double one_lookup_cost= get_tmp_table_lookup_cost(join->thd,
+                                                        sj_outer_fanout,
+                                                        temptable_rec_size);
+      double one_write_cost= get_tmp_table_write_cost(join->thd,
+                                                      sj_outer_fanout,
+                                                      temptable_rec_size);
+
+      double write_cost= join->positions[first_tab].prefix_record_count* 
+                         sj_outer_fanout * one_write_cost;
+      double full_lookup_cost= join->positions[first_tab].prefix_record_count* 
+                               sj_outer_fanout* sj_inner_fanout * 
+                               one_lookup_cost;
+      dups_cost += write_cost + full_lookup_cost;
+      
+      /*
+        Use the strategy if 
+         * it is cheaper then what we've had, or
+         * we haven't picked any other semi-join strategy yet
+        The second part is necessary because this strategy is the last one
+        to consider (it needs "the most" tables in the prefix) and we can't
+        leave duplicate-producing tables not handled by any strategy.
+      */
+      if (dups_cost < *current_read_time || join->cur_dups_producing_tables)
+      {
+        pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT;
+        *current_read_time= dups_cost;
+        *current_record_count= prefix_rec_count * sj_outer_fanout;
+        join->cur_dups_producing_tables &= ~dups_removed_fanout;
+      }
+    }
+  }
+}
+
+
+/*
+  Remove the last join tab from from join->cur_sj_inner_tables bitmap
+  we assume remaining_tables doesnt contain @tab.
+*/
+
+void restore_prev_sj_state(const table_map remaining_tables, 
+                                  const JOIN_TAB *tab, uint idx)
+{
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= tab->emb_sj_nest))
+  {
+    /* If we're removing the last SJ-inner table, remove the sj-nest */
+    if ((remaining_tables & emb_sj_nest->sj_inner_tables) == 
+        (emb_sj_nest->sj_inner_tables & ~tab->table->map))
+    {
+      tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+    }
+  }
+  POSITION *pos= tab->join->positions + idx;
+  tab->join->cur_dups_producing_tables= pos->prefix_dups_producing_tables;
+}
+
+
+/*
+  Given a semi-join nest, find out which of the IN-equalities are bound
+
+  SYNOPSIS
+    get_bound_sj_equalities()
+      sj_nest           Semi-join nest
+      remaining_tables  Tables that are not yet bound
+
+  DESCRIPTION
+    Given a semi-join nest, find out which of the IN-equalities have their
+    left part expression bound (i.e. the said expression doesn't refer to
+    any of remaining_tables and can be evaluated).
+
+  RETURN
+    Bitmap of bound IN-equalities.
+*/
+
+ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, 
+                                  table_map remaining_tables)
+{
+  List_iterator<Item> li(sj_nest->nested_join->sj_outer_expr_list);
+  Item *item;
+  uint i= 0;
+  ulonglong res= 0;
+  while ((item= li++))
+  {
+    /*
+      Q: should this take into account equality propagation and how?
+      A: If e->outer_side is an Item_field, walk over the equality
+         class and see if there is an element that is bound?
+      (this is an optional feature)
+    */
+    if (!(item->used_tables() & remaining_tables))
+    {
+      res |= 1ULL << i;
+    }
+  }
+  return res;
+}
+
+
+/*
+  Check if the last tables of the partial join order allow to use
+  sj-materialization strategy for them
+
+  SYNOPSIS
+    at_sjmat_pos()
+      join              
+      remaining_tables
+      tab                the last table's join tab
+      idx                last table's index
+      loose_scan    OUT  TRUE <=> use LooseScan
+
+  RETURN
+    TRUE   Yes, can apply sj-materialization
+    FALSE  No, some of the requirements are not met
+*/
+
+static SJ_MATERIALIZATION_INFO *
+at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
+             uint idx, bool *loose_scan)
+{
+  /*
+   Check if 
+    1. We're in a semi-join nest that can be run with SJ-materialization
+    2. All the tables correlated through the IN subquery are in the prefix
+  */
+  TABLE_LIST *emb_sj_nest= tab->emb_sj_nest;
+  table_map suffix= remaining_tables & ~tab->table->map;
+  if (emb_sj_nest && emb_sj_nest->sj_mat_info &&
+      !(suffix & emb_sj_nest->sj_inner_tables))
+  {
+    /* 
+      Walk back and check if all immediately preceding tables are from
+      this semi-join.
+    */
+    uint n_tables= my_count_bits(tab->emb_sj_nest->sj_inner_tables);
+    for (uint i= 1; i < n_tables ; i++)
+    {
+      if (join->positions[idx - i].table->emb_sj_nest != tab->emb_sj_nest)
+        return NULL;
+    }
+    *loose_scan= test(remaining_tables & ~tab->table->map &
+                             (emb_sj_nest->sj_inner_tables |
+                              emb_sj_nest->nested_join->sj_depends_on));
+    if (*loose_scan && !emb_sj_nest->sj_subq_pred->sjm_scan_allowed)
+      return NULL;
+    else
+      return emb_sj_nest->sj_mat_info;
+  }
+  return NULL;
+}
+
+
+
+/*
+  Fix semi-join strategies for the picked join order
+
+  SYNOPSIS
+    fix_semijoin_strategies_for_picked_join_order()
+      join  The join with the picked join order
+
+  DESCRIPTION
+    Fix semi-join strategies for the picked join order. This is a step that
+    needs to be done right after we have fixed the join order. What we do
+    here is switch join's semi-join strategy description from backward-based
+    to forwards based.
+    
+    When join optimization is in progress, we re-consider semi-join
+    strategies after we've added another table. Here's an illustration.
+    Suppose the join optimization is underway:
+
+    1) ot1  it1  it2 
+                 sjX  -- looking at (ot1, it1, it2) join prefix, we decide
+                         to use semi-join strategy sjX.
+
+    2) ot1  it1  it2  ot2 
+                 sjX  sjY -- Having added table ot2, we now may consider
+                             another semi-join strategy and decide to use a 
+                             different strategy sjY. Note that the record
+                             of sjX has remained under it2. That is
+                             necessary because we need to be able to get
+                             back to (ot1, it1, it2) join prefix.
+      what makes things even worse is that there are cases where the choice
+      of sjY changes the way we should access it2. 
+
+    3) [ot1  it1  it2  ot2  ot3]
+                  sjX  sjY  -- This means that after join optimization is
+                               finished, semi-join info should be read
+                               right-to-left (while nearly all plan refinement
+                               functions, EXPLAIN, etc proceed from left to 
+                               right)
+
+    This function does the needed reversal, making it possible to read the
+    join and semi-join order from left to right.
+*/    
+
+void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
+{
+  uint table_count=join->table_count;
+  uint tablenr;
+  table_map remaining_tables= 0;
+  table_map handled_tabs= 0;
+  for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--)
+  {
+    POSITION *pos= join->best_positions + tablenr;
+    JOIN_TAB *s= pos->table;
+    uint first;
+    LINT_INIT(first); // Set by every branch except SJ_OPT_NONE which doesn't use it
+
+    if ((handled_tabs & s->table->map) || pos->sj_strategy == SJ_OPT_NONE)
+    {
+      remaining_tables |= s->table->map;
+      continue;
+    }
+    
+    if (pos->sj_strategy == SJ_OPT_MATERIALIZE)
+    {
+      SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info;
+      sjm->is_used= TRUE;
+      sjm->is_sj_scan= FALSE;
+      memcpy(pos - sjm->tables + 1, sjm->positions, 
+             sizeof(POSITION) * sjm->tables);
+      first= tablenr - sjm->tables + 1;
+      join->best_positions[first].n_sj_tables= sjm->tables;
+      join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE;
+    }
+    else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+    {
+      POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner;
+      SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info;
+      sjm->is_used= TRUE;
+      sjm->is_sj_scan= TRUE;
+      first= pos->sjm_scan_last_inner - sjm->tables + 1;
+      memcpy(join->best_positions + first, 
+             sjm->positions, sizeof(POSITION) * sjm->tables);
+      join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
+      join->best_positions[first].n_sj_tables= sjm->tables;
+      /* 
+        Do what advance_sj_state did: re-run best_access_path for every table
+        in the [last_inner_table + 1; pos..) range
+      */
+      double prefix_rec_count;
+      /* Get the prefix record count */
+      if (first == join->const_tables)
+        prefix_rec_count= 1.0;
+      else
+        prefix_rec_count= join->best_positions[first-1].prefix_record_count;
+      
+      /* Add materialization record count*/
+      prefix_rec_count *= sjm->rows;
+      
+      uint i;
+      table_map rem_tables= remaining_tables;
+      for (i= tablenr; i != (first + sjm->tables - 1); i--)
+        rem_tables |= join->best_positions[i].table->table->map;
+
+      POSITION dummy;
+      join->cur_sj_inner_tables= 0;
+      for (i= first + sjm->tables; i <= tablenr; i++)
+      {
+        best_access_path(join, join->best_positions[i].table, rem_tables, i, 
+                         FALSE, prefix_rec_count,
+                         join->best_positions + i, &dummy);
+        prefix_rec_count *= join->best_positions[i].records_read;
+        rem_tables &= ~join->best_positions[i].table->table->map;
+      }
+    }
+ 
+    if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
+    {
+      first= pos->first_firstmatch_table;
+      join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
+      join->best_positions[first].n_sj_tables= tablenr - first + 1;
+      POSITION dummy; // For loose scan paths
+      double record_count= (first== join->const_tables)? 1.0: 
+                           join->best_positions[tablenr - 1].prefix_record_count;
+      
+      table_map rem_tables= remaining_tables;
+      uint idx;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        rem_tables |= join->best_positions[idx].table->table->map;
+      }
+      /*
+        Re-run best_access_path to produce best access methods that do not use
+        join buffering
+      */ 
+      join->cur_sj_inner_tables= 0;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        if (join->best_positions[idx].use_join_buffer)
+        {
+           best_access_path(join, join->best_positions[idx].table, 
+                            rem_tables, idx, TRUE /* no jbuf */,
+                            record_count, join->best_positions + idx, &dummy);
+        }
+        record_count *= join->best_positions[idx].records_read;
+        rem_tables &= ~join->best_positions[idx].table->table->map;
+      }
+    }
+
+    if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) 
+    {
+      first= pos->first_loosescan_table;
+      POSITION *first_pos= join->best_positions + first;
+      POSITION loose_scan_pos; // For loose scan paths
+      double record_count= (first== join->const_tables)? 1.0: 
+                           join->best_positions[tablenr - 1].prefix_record_count;
+      
+      table_map rem_tables= remaining_tables;
+      uint idx;
+      for (idx= first; idx <= tablenr; idx++)
+        rem_tables |= join->best_positions[idx].table->table->map;
+      /*
+        Re-run best_access_path to produce best access methods that do not use
+        join buffering
+      */ 
+      join->cur_sj_inner_tables= 0;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        if (join->best_positions[idx].use_join_buffer || (idx == first))
+        {
+           best_access_path(join, join->best_positions[idx].table,
+                            rem_tables, idx, TRUE /* no jbuf */,
+                            record_count, join->best_positions + idx,
+                            &loose_scan_pos);
+           if (idx==first)
+             join->best_positions[idx]= loose_scan_pos;
+        }
+        rem_tables &= ~join->best_positions[idx].table->table->map;
+        record_count *= join->best_positions[idx].records_read;
+      }
+      first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
+      first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
+    }
+
+    if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
+    {
+      /* 
+        Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
+        this table.
+      */
+      first= pos->first_dupsweedout_table;
+      join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT;
+      join->best_positions[first].n_sj_tables= tablenr - first + 1;
+    }
+    
+    uint i_end= first + join->best_positions[first].n_sj_tables;
+    for (uint i= first; i < i_end; i++)
+    {
+      if (i != first)
+        join->best_positions[i].sj_strategy= SJ_OPT_NONE;
+      handled_tabs |= join->best_positions[i].table->table->map;
+    }
+
+    if (tablenr != first)
+      pos->sj_strategy= SJ_OPT_NONE;
+    remaining_tables |= s->table->map;
+    //s->sj_strategy= pos->sj_strategy;
+    join->join_tab[first].sj_strategy= join->best_positions[first].sj_strategy;
+    join->join_tab[first].n_sj_tables= join->best_positions[first].n_sj_tables;
+  }
+}
+
+
+/*
+  Setup semi-join materialization strategy for one semi-join nest
+  
+  SYNOPSIS
+
+  setup_sj_materialization()
+    tab  The first tab in the semi-join
+
+  DESCRIPTION
+    Setup execution structures for one semi-join materialization nest:
+    - Create the materialization temporary table
+    - If we're going to do index lookups
+        create TABLE_REF structure to make the lookus
+    - else (if we're going to do a full scan of the temptable)
+        create Copy_field structures to do copying.
+
+  RETURN
+    FALSE  Ok
+    TRUE   Error
+*/
+
+bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab)
+{
+  DBUG_ENTER("setup_sj_materialization");
+  JOIN_TAB *tab= sjm_tab->bush_children->start;
+  TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
+  SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info;
+  THD *thd= tab->join->thd;
+  /* First the calls come to the materialization function */
+  //List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
+  
+  DBUG_ASSERT(sjm->is_used);
+  /* 
+    Set up the table to write to, do as select_union::create_result_table does
+  */
+  sjm->sjm_table_param.init();
+  sjm->sjm_table_param.bit_fields_as_long= TRUE;
+  //List_iterator<Item> it(item_list);
+  SELECT_LEX *subq_select= emb_sj_nest->sj_subq_pred->unit->first_select();
+  Item **p_item= subq_select->ref_pointer_array;
+  Item **p_end= p_item + subq_select->item_list.elements;
+  //while((right_expr= it++))
+  for(;p_item != p_end; p_item++)
+    sjm->sjm_table_cols.push_back(*p_item);
+
+  sjm->sjm_table_param.field_count= subq_select->item_list.elements;
+
+  if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param, 
+                                     sjm->sjm_table_cols, (ORDER*) 0, 
+                                     TRUE /* distinct */, 
+                                     1, /*save_sum_fields*/
+                                     thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS, 
+                                     HA_POS_ERROR /*rows_limit */, 
+                                     (char*)"sj-materialize")))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  sjm->table->file->extra(HA_EXTRA_WRITE_CACHE);
+  sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+
+  tab->join->sj_tmp_tables.push_back(sjm->table);
+  tab->join->sjm_info_list.push_back(sjm);
+  
+  sjm->materialized= FALSE;
+  sjm_tab->table= sjm->table;
+  sjm->table->pos_in_table_list= emb_sj_nest;
+ 
+  DBUG_RETURN(FALSE);
+}
+
+
+bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
+{
+  DBUG_ENTER("setup_sj_materialization_part2");
+  JOIN_TAB *tab= sjm_tab->bush_children->start;
+  TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
+  SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info;
+  THD *thd= tab->join->thd;
+  uint i;
+  //List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
+  //List_iterator<Item> it(item_list);
+
+  if (!sjm->is_sj_scan)
+  {
+    KEY           *tmp_key; /* The only index on the temporary table. */
+    uint          tmp_key_parts; /* Number of keyparts in tmp_key. */
+    tmp_key= sjm->table->key_info;
+    tmp_key_parts= tmp_key->key_parts;
+    
+    /*
+      Create/initialize everything we will need to index lookups into the
+      temptable.
+    */
+    TABLE_REF *tab_ref;
+    tab_ref= &sjm_tab->ref;
+    tab_ref->key= 0; /* The only temp table index. */
+    tab_ref->key_length= tmp_key->key_length;
+    if (!(tab_ref->key_buff=
+          (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+        !(tab_ref->key_copy=
+          (store_key**) thd->alloc((sizeof(store_key*) *
+                                    (tmp_key_parts + 1)))) ||
+        !(tab_ref->items=
+          (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+
+    tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length);
+    tab_ref->key_err=1;
+    tab_ref->null_rejecting= 1;
+    tab_ref->disable_cache= FALSE;
+
+    KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+    store_key **ref_key= tab_ref->key_copy;
+    uchar *cur_ref_buff= tab_ref->key_buff;
+    
+    for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+    {
+      tab_ref->items[i]= emb_sj_nest->sj_subq_pred->left_expr->element_index(i);
+      int null_count= test(cur_key_part->field->real_maybe_null());
+      *ref_key= new store_key_item(thd, cur_key_part->field,
+                                   /* TODO:
+                                      the NULL byte is taken into account in
+                                      cur_key_part->store_length, so instead of
+                                      cur_ref_buff + test(maybe_null), we could
+                                      use that information instead.
+                                   */
+                                   cur_ref_buff + null_count,
+                                   null_count ? cur_ref_buff : 0,
+                                   cur_key_part->length, tab_ref->items[i],
+                                   FALSE);
+      cur_ref_buff+= cur_key_part->store_length;
+    }
+    *ref_key= NULL; /* End marker. */
+      
+    /*
+      We don't ever have guarded conditions for SJM tables, but code at SQL
+      layer depends on cond_guards array being alloced.
+    */
+    if (!(tab_ref->cond_guards= (bool**) thd->calloc(sizeof(uint*)*tmp_key_parts)))
+    {
+      DBUG_RETURN(TRUE);
+    }
+
+    tab_ref->key_err= 1;
+    tab_ref->key_parts= tmp_key_parts;
+    sjm->tab_ref= tab_ref;
+
+    /*
+      Remove the injected semi-join IN-equalities from join_tab conds. This
+      needs to be done because the IN-equalities refer to columns of
+      sj-inner tables which are not available after the materialization
+      has been finished.
+    */
+    for (i= 0; i < sjm->tables; i++)
+    {
+      remove_sj_conds(&tab[i].select_cond);
+      if (tab[i].select)
+        remove_sj_conds(&tab[i].select->cond);
+    }
+    if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm,
+                                                      emb_sj_nest->sj_subq_pred)))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+    sjm_tab->type= JT_EQ_REF;
+    sjm_tab->select_cond= sjm->in_equality;
+  }
+  else
+  {
+    /*
+      We'll be doing full scan of the temptable.  
+      Setup copying of temptable columns back to the record buffers
+      for their source tables. We need this because IN-equalities
+      refer to the original tables.
+
+      EXAMPLE
+
+      Consider the query:
+        SELECT * FROM ot WHERE ot.col1 IN (SELECT it.col2 FROM it)
+      
+      Suppose it's executed with SJ-Materialization-scan. We choose to do scan
+      if we can't do the lookup, i.e. the join order is (it, ot). The plan
+      would look as follows:
+
+        table    access method      condition
+         it      materialize+scan    -
+         ot      (whatever)          ot1.col1=it.col2 (C2)
+
+      The condition C2 refers to current row of table it. The problem is
+      that by the time we evaluate C2, we would have finished with scanning
+      it itself and will be scanning the temptable. 
+
+      At the moment, our solution is to copy back: when we get the next
+      temptable record, we copy its columns to their corresponding columns
+      in the record buffers for the source tables. 
+    */
+    sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements];
+    //it.rewind();
+    Item **p_item= emb_sj_nest->sj_subq_pred->unit->first_select()->ref_pointer_array;
+    for (uint i=0; i < sjm->sjm_table_cols.elements; i++)
+    {
+      bool dummy;
+      Item_equal *item_eq;
+      //Item *item= (it++)->real_item();
+      Item *item= (*(p_item++))->real_item();
+      DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
+      Field *copy_to= ((Item_field*)item)->field;
+      /*
+        Tricks with Item_equal are due to the following: suppose we have a
+        query:
+        
+        ... WHERE cond(ot.col) AND ot.col IN (SELECT it2.col FROM it1,it2
+                                               WHERE it1.col= it2.col)
+         then equality propagation will create an 
+         
+           Item_equal(it1.col, it2.col, ot.col) 
+         
+         then substitute_for_best_equal_field() will change the conditions
+         according to the join order:
+
+         table | attached condition
+         ------+--------------------
+          it1  |
+          it2  | it1.col=it2.col
+          ot   | cond(it1.col)
+
+         although we've originally had "SELECT it2.col", conditions attached 
+         to subsequent outer tables will refer to it1.col, so SJM-Scan will
+         need to unpack data to there. 
+         That is, if an element from subquery's select list participates in 
+         equality propagation, then we need to unpack it to the first
+         element equality propagation member that refers to table that is
+         within the subquery.
+      */
+      item_eq= find_item_equal(tab->join->cond_equal, copy_to, &dummy);
+
+      if (item_eq)
+      {
+        List_iterator<Item> it(item_eq->equal_items);
+        /* We're interested in field items only */
+        if (item_eq->get_const())
+          it++;
+        Item *item;
+        while ((item= it++))
+        {
+          if (!(item->used_tables() & ~emb_sj_nest->sj_inner_tables))
+          {
+            DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
+            copy_to= ((Item_field *) (item->real_item()))->field;
+            break;
+          }
+        }
+      }
+      sjm->copy_field[i].set(copy_to, sjm->table->field[i], FALSE);
+      /* The write_set for source tables must be set up to allow the copying */
+      bitmap_set_bit(copy_to->table->write_set, copy_to->field_index);
+    }
+    sjm_tab->type= JT_ALL;
+
+    /* Initialize full scan */
+    sjm_tab->read_first_record= join_read_record_no_init;
+    sjm_tab->read_record.copy_field= sjm->copy_field;
+    sjm_tab->read_record.copy_field_end= sjm->copy_field +
+                                         sjm->sjm_table_cols.elements;
+    sjm_tab->read_record.read_record= rr_sequential_and_unpack;
+  }
+
+  sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
+
+  DBUG_RETURN(FALSE);
+}
+
+
+
+/*
+  Create subquery IN-equalities assuming use of materialization strategy
+  
+  SYNOPSIS
+    create_subq_in_equalities()
+      thd        Thread handle
+      sjm        Semi-join materialization structure
+      subq_pred  The subquery predicate
+
+  DESCRIPTION
+    Create subquery IN-equality predicates. That is, for a subquery
+    
+      (oe1, oe2, ...) IN (SELECT ie1, ie2, ... FROM ...)
+    
+    create "oe1=ie1 AND ie1=ie2 AND ..." expression, such that ie1, ie2, ..
+    refer to the columns of the table that's used to materialize the
+    subquery.
+
+  RETURN 
+    Created condition
+*/
+
+static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, 
+                                Item_in_subselect *subq_pred)
+{
+  Item *res= NULL;
+  if (subq_pred->left_expr->cols() == 1)
+  {
+    if (!(res= new Item_func_eq(subq_pred->left_expr,
+                                new Item_field(sjm->table->field[0]))))
+      return NULL; /* purecov: inspected */
+  }
+  else
+  {
+    Item *conj;
+    for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+    {
+      if (!(conj= new Item_func_eq(subq_pred->left_expr->element_index(i), 
+                                   new Item_field(sjm->table->field[i]))) ||
+          !(res= and_items(res, conj)))
+        return NULL; /* purecov: inspected */
+    }
+  }
+  if (res->fix_fields(thd, &res))
+    return NULL; /* purecov: inspected */
+  return res;
+}
+
+
+
+
+static void remove_sj_conds(Item **tree)
+{
+  if (*tree)
+  {
+    if (is_cond_sj_in_equality(*tree))
+    {
+      *tree= NULL;
+      return;
+    }
+    else if ((*tree)->type() == Item::COND_ITEM) 
+    {
+      Item *item;
+      List_iterator<Item> li(*(((Item_cond*)*tree)->argument_list()));
+      while ((item= li++))
+      {
+        if (is_cond_sj_in_equality(item))
+          li.replace(new Item_int(1));
+      }
+    }
+  }
+}
+
+/* Check if given Item was injected by semi-join equality */
+static bool is_cond_sj_in_equality(Item *item)
+{
+  if (item->type() == Item::FUNC_ITEM &&
+      ((Item_func*)item)->functype()== Item_func::EQ_FUNC)
+  {
+    Item_func_eq *item_eq= (Item_func_eq*)item;
+    return test(item_eq->in_equality_no != UINT_MAX);
+  }
+  return FALSE;
+}
+
+
+/*
+  Create a temporary table to weed out duplicate rowid combinations
+
+  SYNOPSIS
+
+    create_duplicate_weedout_tmp_table()
+      thd                    Thread handle
+      uniq_tuple_length_arg  Length of the table's column
+      sjtbl                  Update sjtbl->[start_]recinfo values which 
+                             will be needed if we'll need to convert the 
+                             created temptable from HEAP to MyISAM/Maria.
+
+  DESCRIPTION
+    Create a temporary table to weed out duplicate rowid combinations. The
+    table has a single column that is a concatenation of all rowids in the
+    combination. 
+
+    Depending on the needed length, there are two cases:
+
+    1. When the length of the column < max_key_length:
+
+      CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col));
+
+    2. Otherwise (not a valid SQL syntax but internally supported):
+
+      CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col));
+
+    The code in this function was produced by extraction of relevant parts
+    from create_tmp_table().
+
+  RETURN
+    created table
+    NULL on error
+*/
+
+TABLE *create_duplicate_weedout_tmp_table(THD *thd, 
+                                          uint uniq_tuple_length_arg,
+                                          SJ_TMP_TABLE *sjtbl)
+{
+  MEM_ROOT *mem_root_save, own_root;
+  TABLE *table;
+  TABLE_SHARE *share;
+  uint  temp_pool_slot=MY_BIT_NONE;
+  char	*tmpname,path[FN_REFLEN];
+  Field **reg_field;
+  KEY_PART_INFO *key_part_info;
+  KEY *keyinfo;
+  uchar *group_buff;
+  uchar *bitmaps;
+  uint *blob_field;
+  ENGINE_COLUMNDEF *recinfo, *start_recinfo;
+  bool using_unique_constraint=FALSE;
+  bool use_packed_rows= FALSE;
+  Field *field, *key_field;
+  uint blob_count, null_pack_length, null_count;
+  uchar *null_flags;
+  uchar *pos;
+  DBUG_ENTER("create_duplicate_weedout_tmp_table");
+  DBUG_ASSERT(!sjtbl->is_degenerate);
+  /*
+    STEP 1: Get temporary table name
+  */
+  statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status);
+  if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
+    temp_pool_slot = bitmap_lock_set_next(&temp_pool);
+
+  if (temp_pool_slot != MY_BIT_NONE) // we got a slot
+    sprintf(path, "%s_%lx_%i", tmp_file_prefix,
+	    current_pid, temp_pool_slot);
+  else
+  {
+    /* if we run out of slots or we are not using tempool */
+    sprintf(path,"%s%lx_%lx_%x", tmp_file_prefix,current_pid,
+            thd->thread_id, thd->tmp_table++);
+  }
+  fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
+
+  /* STEP 2: Figure if we'll be using a key or blob+constraint */
+  if (uniq_tuple_length_arg >= CONVERT_IF_BIGGER_TO_BLOB)
+    using_unique_constraint= TRUE;
+
+  /* STEP 3: Allocate memory for temptable description */
+  init_sql_alloc(&own_root, TABLE_ALLOC_BLOCK_SIZE, 0);
+  if (!multi_alloc_root(&own_root,
+                        &table, sizeof(*table),
+                        &share, sizeof(*share),
+                        &reg_field, sizeof(Field*) * (1+1),
+                        &blob_field, sizeof(uint)*2,
+                        &keyinfo, sizeof(*keyinfo),
+                        &key_part_info, sizeof(*key_part_info) * 2,
+                        &start_recinfo,
+                        sizeof(*recinfo)*(1*2+4),
+                        &tmpname, (uint) strlen(path)+1,
+                        &group_buff, (!using_unique_constraint ?
+                                      uniq_tuple_length_arg : 0),
+                        &bitmaps, bitmap_buffer_size(1)*3,
+                        NullS))
+  {
+    if (temp_pool_slot != MY_BIT_NONE)
+      bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
+    DBUG_RETURN(NULL);
+  }
+  strmov(tmpname,path);
+  
+
+  /* STEP 4: Create TABLE description */
+  bzero((char*) table,sizeof(*table));
+  bzero((char*) reg_field,sizeof(Field*)*2);
+
+  table->mem_root= own_root;
+  mem_root_save= thd->mem_root;
+  thd->mem_root= &table->mem_root;
+
+  table->field=reg_field;
+  table->alias.set("weedout-tmp", sizeof("weedout-tmp")-1,
+                   table_alias_charset);
+  table->reginfo.lock_type=TL_WRITE;	/* Will be updated */
+  table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
+  table->map=1;
+  table->temp_pool_slot = temp_pool_slot;
+  table->copy_blobs= 1;
+  table->in_use= thd;
+  table->quick_keys.init();
+  table->covering_keys.init();
+  table->keys_in_use_for_query.init();
+
+  table->s= share;
+  init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
+  share->blob_field= blob_field;
+  share->blob_ptr_size= portable_sizeof_char_ptr;
+  share->table_charset= NULL;
+  share->primary_key= MAX_KEY;               // Indicate no primary key
+  share->keys_for_keyread.init();
+  share->keys_in_use.init();
+
+  blob_count= 0;
+
+  /* Create the field */
+  {
+    /*
+      For the sake of uniformity, always use Field_varstring (altough we could
+      use Field_string for shorter keys)
+    */
+    field= new Field_varstring(uniq_tuple_length_arg, FALSE, "rowids", share,
+                               &my_charset_bin);
+    if (!field)
+      DBUG_RETURN(0);
+    field->table= table;
+    field->key_start.init(0);
+    field->part_of_key.init(0);
+    field->part_of_sortkey.init(0);
+    field->unireg_check= Field::NONE;
+    field->flags= (NOT_NULL_FLAG | BINARY_FLAG | NO_DEFAULT_VALUE_FLAG);
+    field->reset_fields();
+    field->init(table);
+    field->orig_table= NULL;
+     
+    field->field_index= 0;
+    
+    *(reg_field++)= field;
+    *blob_field= 0;
+    *reg_field= 0;
+
+    share->fields= 1;
+    share->blob_fields= 0;
+  }
+
+  uint reclength= field->pack_length();
+  if (using_unique_constraint)
+  { 
+    share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
+    table->file= get_new_handler(share, &table->mem_root,
+                                 share->db_type());
+    DBUG_ASSERT(uniq_tuple_length_arg <= table->file->max_key_length());
+  }
+  else
+  {
+    share->db_plugin= ha_lock_engine(0, heap_hton);
+    table->file= get_new_handler(share, &table->mem_root,
+                                 share->db_type());
+  }
+  if (!table->file)
+    goto err;
+
+  null_count=1;
+  
+  null_pack_length= 1;
+  reclength += null_pack_length;
+
+  share->reclength= reclength;
+  {
+    uint alloc_length=ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1);
+    share->rec_buff_length= alloc_length;
+    if (!(table->record[0]= (uchar*)
+                            alloc_root(&table->mem_root, alloc_length*3)))
+      goto err;
+    table->record[1]= table->record[0]+alloc_length;
+    share->default_values= table->record[1]+alloc_length;
+  }
+  setup_tmp_table_column_bitmaps(table, bitmaps);
+
+  recinfo= start_recinfo;
+  null_flags=(uchar*) table->record[0];
+  pos=table->record[0]+ null_pack_length;
+  if (null_pack_length)
+  {
+    bzero((uchar*) recinfo,sizeof(*recinfo));
+    recinfo->type=FIELD_NORMAL;
+    recinfo->length=null_pack_length;
+    recinfo++;
+    bfill(null_flags,null_pack_length,255);	// Set null fields
+
+    table->null_flags= (uchar*) table->record[0];
+    share->null_fields= null_count;
+    share->null_bytes= null_pack_length;
+  }
+  null_count=1;
+
+  {
+    //Field *field= *reg_field;
+    uint length;
+    bzero((uchar*) recinfo,sizeof(*recinfo));
+    field->move_field(pos,(uchar*) 0,0);
+
+    field->reset();
+    /*
+      Test if there is a default field value. The test for ->ptr is to skip
+      'offset' fields generated by initalize_tables
+    */
+    // Initialize the table field:
+    bzero(field->ptr, field->pack_length());
+
+    length=field->pack_length();
+    pos+= length;
+
+    /* Make entry for create table */
+    recinfo->length=length;
+    if (field->flags & BLOB_FLAG)
+      recinfo->type= FIELD_BLOB;
+    else if (use_packed_rows &&
+             field->real_type() == MYSQL_TYPE_STRING &&
+	     length >= MIN_STRING_LENGTH_TO_PACK_ROWS)
+      recinfo->type=FIELD_SKIP_ENDSPACE;
+    else
+      recinfo->type=FIELD_NORMAL;
+
+    field->set_table_name(&table->alias);
+  }
+
+  if (thd->variables.tmp_table_size == ~ (ulonglong) 0)		// No limit
+    share->max_rows= ~(ha_rows) 0;
+  else
+    share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
+                                 min(thd->variables.tmp_table_size,
+                                     thd->variables.max_heap_table_size) :
+                                 thd->variables.tmp_table_size) /
+			         share->reclength);
+  set_if_bigger(share->max_rows,1);		// For dummy start options
+
+
+  //// keyinfo= param->keyinfo;
+  if (TRUE)
+  {
+    DBUG_PRINT("info",("Creating group key in temporary table"));
+    share->keys=1;
+    share->uniques= test(using_unique_constraint);
+    table->key_info=keyinfo;
+    keyinfo->key_part=key_part_info;
+    keyinfo->flags=HA_NOSAME;
+    keyinfo->usable_key_parts= keyinfo->key_parts= 1;
+    keyinfo->key_length=0;
+    keyinfo->rec_per_key=0;
+    keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+    keyinfo->name= (char*) "weedout_key";
+    {
+      key_part_info->null_bit=0;
+      key_part_info->field=  field;
+      key_part_info->offset= field->offset(table->record[0]);
+      key_part_info->length= (uint16) field->key_length();
+      key_part_info->type=   (uint8) field->key_type();
+      key_part_info->key_type = FIELDFLAG_BINARY;
+      if (!using_unique_constraint)
+      {
+	if (!(key_field= field->new_key_field(thd->mem_root, table,
+                                              group_buff,
+                                              field->null_ptr,
+                                              field->null_bit)))
+	  goto err;
+        key_part_info->key_part_flag|= HA_END_SPACE_ARE_EQUAL; //todo need this?
+      }
+      keyinfo->key_length+=  key_part_info->length;
+    }
+  }
+
+  if (thd->is_fatal_error)			// If end of memory
+    goto err;
+  share->db_record_offset= 1;
+  table->no_rows= 1;              		// We don't need the data
+
+  // recinfo must point after last field
+  recinfo++;
+  if (share->db_type() == TMP_ENGINE_HTON)
+  {
+    if (create_internal_tmp_table(table, keyinfo, start_recinfo, &recinfo, 0, 0))
+      goto err;
+  }
+  sjtbl->start_recinfo= start_recinfo;
+  sjtbl->recinfo=       recinfo;
+  if (open_tmp_table(table))
+    goto err;
+
+  thd->mem_root= mem_root_save;
+  DBUG_RETURN(table);
+
+err:
+  thd->mem_root= mem_root_save;
+  free_tmp_table(thd,table);                    /* purecov: inspected */
+  if (temp_pool_slot != MY_BIT_NONE)
+    bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
+  DBUG_RETURN(NULL);				/* purecov: inspected */
+}
+
+
+/*
+  SemiJoinDuplicateElimination: Reset the temporary table
+*/
+
+int do_sj_reset(SJ_TMP_TABLE *sj_tbl)
+{
+  DBUG_ENTER("do_sj_reset");
+  if (sj_tbl->tmp_table)
+  {
+    int rc= sj_tbl->tmp_table->file->ha_delete_all_rows();
+    DBUG_RETURN(rc);
+  }
+  sj_tbl->have_degenerate_row= FALSE;
+  DBUG_RETURN(0);
+}
+
+/*
+  SemiJoinDuplicateElimination: Weed out duplicate row combinations
+
+  SYNPOSIS
+    do_sj_dups_weedout()
+      thd    Thread handle
+      sjtbl  Duplicate weedout table
+
+  DESCRIPTION
+    Try storing current record combination of outer tables (i.e. their
+    rowids) in the temporary table. This records the fact that we've seen 
+    this record combination and also tells us if we've seen it before.
+
+  RETURN
+    -1  Error
+    1   The row combination is a duplicate (discard it)
+    0   The row combination is not a duplicate (continue)
+*/
+
+int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) 
+{
+  int error;
+  SJ_TMP_TABLE::TAB *tab= sjtbl->tabs;
+  SJ_TMP_TABLE::TAB *tab_end= sjtbl->tabs_end;
+  uchar *ptr;
+  uchar *nulls_ptr;
+
+  DBUG_ENTER("do_sj_dups_weedout");
+
+  if (sjtbl->is_degenerate)
+  {
+    if (sjtbl->have_degenerate_row) 
+      DBUG_RETURN(1);
+
+    sjtbl->have_degenerate_row= TRUE;
+    DBUG_RETURN(0);
+  }
+
+  ptr= sjtbl->tmp_table->record[0] + 1;
+
+  /* Put the the rowids tuple into table->record[0]: */
+
+  // 1. Store the length 
+  if (((Field_varstring*)(sjtbl->tmp_table->field[0]))->length_bytes == 1)
+  {
+    *ptr= (uchar)(sjtbl->rowid_len + sjtbl->null_bytes);
+    ptr++;
+  }
+  else
+  {
+    int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes);
+    ptr += 2;
+  }
+
+  nulls_ptr= ptr;
+  // 2. Zero the null bytes 
+  if (sjtbl->null_bytes)
+  {
+    bzero(ptr, sjtbl->null_bytes);
+    ptr += sjtbl->null_bytes; 
+  }
+
+  // 3. Put the rowids
+  for (uint i=0; tab != tab_end; tab++, i++)
+  {
+    handler *h= tab->join_tab->table->file;
+    if (tab->join_tab->table->maybe_null && tab->join_tab->table->null_row)
+    {
+      /* It's a NULL-complemented row */
+      *(nulls_ptr + tab->null_byte) |= tab->null_bit;
+      bzero(ptr + tab->rowid_offset, h->ref_length);
+    }
+    else
+    {
+      /* Copy the rowid value */
+      memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length);
+    }
+  }
+
+  error= sjtbl->tmp_table->file->ha_write_tmp_row(sjtbl->tmp_table->record[0]);
+  if (error)
+  {
+    /* create_internal_tmp_table_from_heap will generate error if needed */
+    if (!sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP))
+      DBUG_RETURN(1); /* Duplicate */
+    if (create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table,
+                                            sjtbl->start_recinfo,
+                                            &sjtbl->recinfo, error, 1))
+      DBUG_RETURN(-1);
+  }
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Setup the strategies to eliminate semi-join duplicates.
+  
+  SYNOPSIS
+    setup_semijoin_dups_elimination()
+      join           Join to process
+      options        Join options (needed to see if join buffering will be 
+                     used or not)
+      no_jbuf_after  Another bit of information re where join buffering will
+                     be used.
+
+  DESCRIPTION
+    Setup the strategies to eliminate semi-join duplicates. ATM there are 4
+    strategies:
+
+    1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
+                         of row combinations)
+    2. FirstMatch (pick only the 1st matching row combination of inner tables)
+    3. LooseScan (scanning the sj-inner table in a way that groups duplicates
+                  together and picking the 1st one)
+    4. SJ-Materialization.
+    
+    The join order has "duplicate-generating ranges", and every range is
+    served by one strategy or a combination of FirstMatch with with some
+    other strategy.
+    
+    "Duplicate-generating range" is defined as a range within the join order
+    that contains all of the inner tables of a semi-join. All ranges must be
+    disjoint, if tables of several semi-joins are interleaved, then the ranges
+    are joined together, which is equivalent to converting
+      SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
+    to
+      SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
+    .
+
+    Applicability conditions are as follows:
+
+    DuplicateWeedout strategy
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+      (ot|nt)*  [ it ((it|ot|nt)* (it|ot))]  (nt)*
+      +------+  +=========================+  +---+
+        (1)                 (2)               (3)
+
+       (1) - Prefix of OuterTables (those that participate in 
+             IN-equality and/or are correlated with subquery) and outer 
+             Non-correlated tables.
+       (2) - The handled range. The range starts with the first sj-inner
+             table, and covers all sj-inner and outer tables 
+             Within the range,  Inner, Outer, outer non-correlated tables
+             may follow in any order.
+       (3) - The suffix of outer non-correlated tables.
+    
+    FirstMatch strategy
+    ~~~~~~~~~~~~~~~~~~~
+
+      (ot|nt)*  [ it ((it|nt)* it) ]  (nt)*
+      +------+  +==================+  +---+
+        (1)             (2)          (3)
+
+      (1) - Prefix of outer and non-correlated tables
+      (2) - The handled range, which may contain only inner and
+            non-correlated tables.
+      (3) - The suffix of outer non-correlated tables.
+
+    LooseScan strategy 
+    ~~~~~~~~~~~~~~~~~~
+
+     (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ]  (ot|nt)*
+     +--------+   +===========+ +=============+   +------+
+        (1)           (2)          (3)              (4)
+     
+      (1) - Prefix that may contain any outer tables. The prefix must contain
+            all the non-trivially correlated outer tables. (non-trivially means
+            that the correlation is not just through the IN-equality).
+      
+      (2) - Inner table for which the LooseScan scan is performed.
+
+      (3) - The remainder of the duplicate-generating range. It is served by 
+            application of FirstMatch strategy, with the exception that
+            outer IN-correlated tables are considered to be non-correlated.
+
+      (4) - THe suffix of outer and outer non-correlated tables.
+
+  
+  The choice between the strategies is made by the join optimizer (see
+  advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()).
+  This function sets up all fields/structures/etc needed for execution except
+  for setup/initialization of semi-join materialization which is done in 
+  setup_sj_materialization() (todo: can't we move that to here also?)
+
+  RETURN
+    FALSE  OK 
+    TRUE   Out of memory error
+*/
+
+int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, 
+                                    uint no_jbuf_after)
+{
+  uint i;
+  THD *thd= join->thd;
+  DBUG_ENTER("setup_semijoin_dups_elimination");
+  
+  POSITION *pos= join->best_positions + join->const_tables;
+  for (i= join->const_tables ; i < join->top_join_tab_count; )
+  {
+    JOIN_TAB *tab=join->join_tab + i;
+    //POSITION *pos= join->best_positions + i;
+    uint keylen, keyno;
+    switch (pos->sj_strategy) {
+      case SJ_OPT_MATERIALIZE:
+      case SJ_OPT_MATERIALIZE_SCAN:
+        /* Do nothing */
+        i+= 1;// It used to be pos->n_sj_tables, but now they are embedded in a nest
+        pos += pos->n_sj_tables;
+        break;
+      case SJ_OPT_LOOSE_SCAN:
+      {
+        /* We jump from the last table to the first one */
+        tab->loosescan_match_tab= tab + pos->n_sj_tables - 1;
+
+        /* Calculate key length */
+        keylen= 0;
+        keyno= pos->loosescan_key;
+        for (uint kp=0; kp < pos->loosescan_parts; kp++)
+          keylen += tab->table->key_info[keyno].key_part[kp].store_length;
+
+        tab->loosescan_key_len= keylen;
+        if (pos->n_sj_tables > 1) 
+          tab[pos->n_sj_tables - 1].do_firstmatch= tab;
+        i+= pos->n_sj_tables;
+        pos+= pos->n_sj_tables;
+        break;
+      }
+      case SJ_OPT_DUPS_WEEDOUT:
+      {
+        /*
+          Check for join buffering. If there is one, move the first table
+          forwards, but do not destroy other duplicate elimination methods.
+        */
+        uint first_table= i;
+        uint join_cache_level= join->thd->variables.join_cache_level;
+        for (uint j= i; j < i + pos->n_sj_tables; j++)
+        {
+          /*
+            When we'll properly take join buffering into account during
+            join optimization, the below check should be changed to 
+            "if (join->best_positions[j].use_join_buffer && 
+                 j <= no_jbuf_after)".
+            For now, use a rough criteria:
+          */
+          JOIN_TAB *js_tab=join->join_tab + j; 
+          if (j != join->const_tables && js_tab->use_quick != 2 &&
+              j <= no_jbuf_after &&
+              ((js_tab->type == JT_ALL && join_cache_level != 0) ||
+               (join_cache_level > 4 && (tab->type == JT_REF || 
+                                         tab->type == JT_EQ_REF))))
+          {
+            /* Looks like we'll be using join buffer */
+            first_table= join->const_tables;
+            break;
+          }
+        }
+
+        SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES];
+        SJ_TMP_TABLE::TAB *last_tab= sjtabs;
+        uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
+        uint jt_null_bits= 0;    // # null bits in tuple bytes
+        /*
+          Walk through the range and remember
+           - tables that need their rowids to be put into temptable
+           - the last outer table
+        */
+        for (JOIN_TAB *j=join->join_tab + first_table; 
+             j < join->join_tab + i + pos->n_sj_tables; j++)
+        {
+          if (sj_table_is_included(join, j))
+          {
+            last_tab->join_tab= j;
+            last_tab->rowid_offset= jt_rowid_offset;
+            jt_rowid_offset += j->table->file->ref_length;
+            if (j->table->maybe_null)
+            {
+              last_tab->null_byte= jt_null_bits / 8;
+              last_tab->null_bit= jt_null_bits++;
+            }
+            last_tab++;
+            j->table->prepare_for_position();
+            j->keep_current_rowid= TRUE;
+          }
+        }
+
+        SJ_TMP_TABLE *sjtbl;
+        if (jt_rowid_offset) /* Temptable has at least one rowid */
+        {
+          size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB);
+          if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) ||
+              !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size)))
+            DBUG_RETURN(TRUE); /* purecov: inspected */
+          memcpy(sjtbl->tabs, sjtabs, tabs_size);
+          sjtbl->is_degenerate= FALSE;
+          sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
+          sjtbl->rowid_len= jt_rowid_offset;
+          sjtbl->null_bits= jt_null_bits;
+          sjtbl->null_bytes= (jt_null_bits + 7)/8;
+          sjtbl->tmp_table= 
+            create_duplicate_weedout_tmp_table(thd, 
+                                               sjtbl->rowid_len + 
+                                               sjtbl->null_bytes,
+                                               sjtbl);
+          join->sj_tmp_tables.push_back(sjtbl->tmp_table);
+        }
+        else
+        {
+          /* 
+            This is a special case where the entire subquery predicate does 
+            not depend on anything at all, ie this is 
+              WHERE const IN (uncorrelated select)
+          */
+          if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))))
+            DBUG_RETURN(TRUE); /* purecov: inspected */
+          sjtbl->tmp_table= NULL;
+          sjtbl->is_degenerate= TRUE;
+          sjtbl->have_degenerate_row= FALSE;
+        }
+        join->join_tab[first_table].flush_weedout_table= sjtbl;
+        join->join_tab[i + pos->n_sj_tables - 1].check_weed_out_table= sjtbl;
+
+        i+= pos->n_sj_tables;
+        pos+= pos->n_sj_tables;
+        break;
+      }
+      case SJ_OPT_FIRST_MATCH:
+      {
+        JOIN_TAB *j, *jump_to= tab-1;
+        for (j= tab; j != tab + pos->n_sj_tables; j++)
+        {
+          /*
+            NOTE: this loop probably doesn't do the right thing for the case 
+            where FirstMatch's duplicate-generating range is interleaved with
+            "unrelated" tables (as specified in WL#3750, section 2.2).
+          */
+          if (!j->emb_sj_nest)
+            jump_to= tab;
+          else
+          {
+            j->first_sj_inner_tab= tab;
+            j->last_sj_inner_tab= tab + pos->n_sj_tables - 1;
+          }
+        }
+        j[-1].do_firstmatch= jump_to;
+        i+= pos->n_sj_tables;
+        pos+= pos->n_sj_tables;
+        break;
+      }
+      case SJ_OPT_NONE:
+        i++;
+        pos++;
+        break;
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Destroy all temporary tables created by NL-semijoin runtime
+*/
+
+void destroy_sj_tmp_tables(JOIN *join)
+{
+  List_iterator<TABLE> it(join->sj_tmp_tables);
+  TABLE *table;
+  while ((table= it++))
+  {
+    /* 
+      SJ-Materialization tables are initialized for either sequential reading 
+      or index lookup, DuplicateWeedout tables are not initialized for read 
+      (we only write to them), so need to call ha_index_or_rnd_end.
+    */
+    table->file->ha_index_or_rnd_end();
+    free_tmp_table(join->thd, table);
+  }
+  join->sj_tmp_tables.empty();
+  join->sjm_info_list.empty();
+}
+
+
+/*
+  Remove all records from all temp tables used by NL-semijoin runtime
+
+  SYNOPSIS
+    clear_sj_tmp_tables()
+      join  The join to remove tables for
+
+  DESCRIPTION
+    Remove all records from all temp tables used by NL-semijoin runtime. This 
+    must be done before every join re-execution.
+*/
+
+int clear_sj_tmp_tables(JOIN *join)
+{
+  int res;
+  List_iterator<TABLE> it(join->sj_tmp_tables);
+  TABLE *table;
+  while ((table= it++))
+  {
+    if ((res= table->file->ha_delete_all_rows()))
+      return res; /* purecov: inspected */
+  }
+
+  SJ_MATERIALIZATION_INFO *sjm;
+  List_iterator<SJ_MATERIALIZATION_INFO> it2(join->sjm_info_list);
+  while ((sjm= it2++))
+  {
+    sjm->materialized= FALSE;
+  }
+  return 0;
+}
+
+
+/*
+  Check if the table's rowid is included in the temptable
+
+  SYNOPSIS
+    sj_table_is_included()
+      join      The join
+      join_tab  The table to be checked
+
+  DESCRIPTION
+    SemiJoinDuplicateElimination: check the table's rowid should be included
+    in the temptable. This is so if
+
+    1. The table is not embedded within some semi-join nest
+    2. The has been pulled out of a semi-join nest, or
+
+    3. The table is functionally dependent on some previous table
+
+    [4. This is also true for constant tables that can't be
+        NULL-complemented but this function is not called for such tables]
+
+  RETURN
+    TRUE  - Include table's rowid
+    FALSE - Don't
+*/
+
+static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab)
+{
+  if (join_tab->emb_sj_nest)
+    return FALSE;
+  
+  /* Check if this table is functionally dependent on the tables that
+     are within the same outer join nest
+  */
+  TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding;
+  if (join_tab->type == JT_EQ_REF)
+  {
+    table_map depends_on= 0;
+    uint idx;
+
+    for (uint kp= 0; kp < join_tab->ref.key_parts; kp++)
+      depends_on |= join_tab->ref.items[kp]->used_tables();
+
+    Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS);
+    while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END)
+    {
+      JOIN_TAB *ref_tab= join->map2table[idx];
+      if (embedding != ref_tab->table->pos_in_table_list->embedding)
+        return TRUE;
+    }
+    /* Ok, functionally dependent */
+    return FALSE;
+  }
+  /* Not functionally dependent => need to include*/
+  return TRUE;
+}
+
+
+/*
+  Index lookup-based subquery: save some flags for EXPLAIN output
+
+  SYNOPSIS
+    save_index_subquery_explain_info()
+      join_tab  Subquery's join tab (there is only one as index lookup is
+                only used for subqueries that are single-table SELECTs)
+      where     Subquery's WHERE clause
+
+  DESCRIPTION
+    For index lookup-based subquery (i.e. one executed with
+    subselect_uniquesubquery_engine or subselect_indexsubquery_engine),
+    check its EXPLAIN output row should contain 
+      "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) 
+      "Using Where" (TAB_INFO_USING_WHERE)
+      "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
+    and set appropriate flags in join_tab->packed_info.
+*/
+
+static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
+{
+  join_tab->packed_info= TAB_INFO_HAVE_VALUE;
+  if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
+    join_tab->packed_info |= TAB_INFO_USING_INDEX;
+  if (where)
+    join_tab->packed_info |= TAB_INFO_USING_WHERE;
+  for (uint i = 0; i < join_tab->ref.key_parts; i++)
+  {
+    if (join_tab->ref.cond_guards[i])
+    {
+      join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL;
+      break;
+    }
+  }
+}
+
+
+/*
+  Check if the join can be rewritten to [unique_]indexsubquery_engine
+
+  DESCRIPTION
+    Check if the join can be changed into [unique_]indexsubquery_engine.
+
+    The check is done after join optimization, the idea is that if the join
+    has only one table and uses a [eq_]ref access generated from subselect's
+    IN-equality then we replace it with a subselect_indexsubquery_engine or a
+    subselect_uniquesubquery_engine.
+
+  RETURN 
+    0 - Ok, rewrite done (stop join optimization and return)
+    1 - Fatal error (stop join optimization and return)
+   -1 - No rewrite performed, continue with join optimization
+*/
+
+int rewrite_to_index_subquery_engine(JOIN *join)
+{
+  THD *thd= join->thd;
+  JOIN_TAB* join_tab=join->join_tab;
+  SELECT_LEX_UNIT *unit= join->unit;
+  DBUG_ENTER("rewrite_to_index_subquery_engine");
+
+  /*
+    is this simple IN subquery?
+  */
+  /* TODO: In order to use these more efficient subquery engines in more cases,
+     the following problems need to be solved:
+     - the code that removes GROUP BY (group_list), also adds an ORDER BY
+       (order), thus GROUP BY queries (almost?) never pass through this branch.
+       Solution: remove the test below '!join->order', because we remove the
+       ORDER clase for subqueries anyway.
+     - in order to set a more efficient engine, the optimizer needs to both
+       decide to remove GROUP BY, *and* select one of the JT_[EQ_]REF[_OR_NULL]
+       access methods, *and* loose scan should be more expensive or
+       inapliccable. When is that possible?
+     - Consider expanding the applicability of this rewrite for loose scan
+       for group by queries.
+  */
+  if (!join->group_list && !join->order &&
+      join->unit->item && 
+      join->unit->item->substype() == Item_subselect::IN_SUBS &&
+      join->table_count == 1 && join->conds &&
+      !join->unit->is_union())
+  {
+    if (!join->having)
+    {
+      Item *where= join->conds;
+      if (join_tab[0].type == JT_EQ_REF &&
+	  join_tab[0].ref.items[0]->name == in_left_expr_name)
+      {
+        remove_subq_pushed_predicates(join, &where);
+        save_index_subquery_explain_info(join_tab, where);
+        join_tab[0].type= JT_UNIQUE_SUBQUERY;
+        join->error= 0;
+        DBUG_RETURN(unit->item->
+                    change_engine(new
+                                  subselect_uniquesubquery_engine(thd,
+                                                                  join_tab,
+                                                                  unit->item,
+                                                                  where)));
+      }
+      else if (join_tab[0].type == JT_REF &&
+	       join_tab[0].ref.items[0]->name == in_left_expr_name)
+      {
+	remove_subq_pushed_predicates(join, &where);
+        save_index_subquery_explain_info(join_tab, where);
+        join_tab[0].type= JT_INDEX_SUBQUERY;
+        join->error= 0;
+        DBUG_RETURN(unit->item->
+                    change_engine(new
+                                  subselect_indexsubquery_engine(thd,
+                                                                 join_tab,
+                                                                 unit->item,
+                                                                 where,
+                                                                 NULL,
+                                                                 0)));
+      }
+    } else if (join_tab[0].type == JT_REF_OR_NULL &&
+	       join_tab[0].ref.items[0]->name == in_left_expr_name &&
+               join->having->name == in_having_cond)
+    {
+      join_tab[0].type= JT_INDEX_SUBQUERY;
+      join->error= 0;
+      join->conds= remove_additional_cond(join->conds);
+      save_index_subquery_explain_info(join_tab, join->conds);
+      DBUG_RETURN(unit->item->
+		  change_engine(new subselect_indexsubquery_engine(thd,
+								   join_tab,
+								   unit->item,
+								   join->conds,
+                                                                   join->having,
+								   1)));
+    }
+  }
+
+  DBUG_RETURN(-1); /* Haven't done the rewrite */
+}
+
+
+/**
+  Remove additional condition inserted by IN/ALL/ANY transformation.
+
+  @param conds   condition for processing
+
+  @return
+    new conditions
+*/
+
+static Item *remove_additional_cond(Item* conds)
+{
+  if (conds->name == in_additional_cond)
+    return 0;
+  if (conds->type() == Item::COND_ITEM)
+  {
+    Item_cond *cnd= (Item_cond*) conds;
+    List_iterator<Item> li(*(cnd->argument_list()));
+    Item *item;
+    while ((item= li++))
+    {
+      if (item->name == in_additional_cond)
+      {
+	li.remove();
+	if (cnd->argument_list()->elements == 1)
+	  return cnd->argument_list()->head();
+	return conds;
+      }
+    }
+  }
+  return conds;
+}
+
+
+/*
+  Remove the predicates pushed down into the subquery
+
+  SYNOPSIS
+    remove_subq_pushed_predicates()
+      where   IN  Must be NULL
+              OUT The remaining WHERE condition, or NULL
+
+  DESCRIPTION
+    Given that this join will be executed using (unique|index)_subquery,
+    without "checking NULL", remove the predicates that were pushed down
+    into the subquery.
+
+    If the subquery compares scalar values, we can remove the condition that
+    was wrapped into trig_cond (it will be checked when needed by the subquery
+    engine)
+
+    If the subquery compares row values, we need to keep the wrapped
+    equalities in the WHERE clause: when the left (outer) tuple has both NULL
+    and non-NULL values, we'll do a full table scan and will rely on the
+    equalities corresponding to non-NULL parts of left tuple to filter out
+    non-matching records.
+
+    TODO: We can remove the equalities that will be guaranteed to be true by the
+    fact that subquery engine will be using index lookup. This must be done only
+    for cases where there are no conversion errors of significance, e.g. 257
+    that is searched in a byte. But this requires homogenization of the return 
+    codes of all Field*::store() methods.
+*/
+
+static void remove_subq_pushed_predicates(JOIN *join, Item **where)
+{
+  if (join->conds->type() == Item::FUNC_ITEM &&
+      ((Item_func *)join->conds)->functype() == Item_func::EQ_FUNC &&
+      ((Item_func *)join->conds)->arguments()[0]->type() == Item::REF_ITEM &&
+      ((Item_func *)join->conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
+      test_if_ref (join->conds,
+                   (Item_field *)((Item_func *)join->conds)->arguments()[1],
+                   ((Item_func *)join->conds)->arguments()[0]))
+  {
+    *where= 0;
+    return;
+  }
+}
+
+
+
+
+/**
+  Optimize all subqueries of a query that were not flattened into a semijoin.
+
+  @details
+  Optimize all immediate children subqueries of a query.
+
+  This phase must be called after substitute_for_best_equal_field() because
+  that function may replace items with other items from a multiple equality,
+  and we need to reference the correct items in the index access method of the
+  IN predicate.
+
+  @return Operation status
+  @retval FALSE     success.
+  @retval TRUE      error occurred.
+*/
+
+bool JOIN::optimize_unflattened_subqueries()
+{
+  return select_lex->optimize_unflattened_subqueries();
+}
+
+
+/*
+  Join tab execution startup function.
+
+  SYNOPSIS
+    join_tab_execution_startup()
+      tab  Join tab to perform startup actions for
+
+  DESCRIPTION
+    Join tab execution startup function. This is different from
+    tab->read_first_record in the regard that this has actions that are to be
+    done once per join execution.
+
+    Currently there are only two possible startup functions, so we have them
+    both here inside if (...) branches. In future we could switch to function
+    pointers.
+
+  TODO: consider moving this together with JOIN_TAB::preread_init
+  
+  RETURN 
+    NESTED_LOOP_OK - OK
+    NESTED_LOOP_ERROR| NESTED_LOOP_KILLED - Error, abort the join execution
+*/
+
+enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab)
+{
+  Item_in_subselect *in_subs;
+  DBUG_ENTER("join_tab_execution_startup");
+  
+  if (tab->table->pos_in_table_list && 
+      (in_subs= tab->table->pos_in_table_list->jtbm_subselect))
+  {
+    /* It's a non-merged SJM nest */
+    DBUG_ASSERT(in_subs->engine->engine_type() ==
+                subselect_engine::HASH_SJ_ENGINE);
+    subselect_hash_sj_engine *hash_sj_engine=
+      ((subselect_hash_sj_engine*)in_subs->engine);
+    if (!hash_sj_engine->is_materialized)
+    {
+      hash_sj_engine->materialize_join->exec();
+      hash_sj_engine->is_materialized= TRUE; 
+
+      if (hash_sj_engine->materialize_join->error || tab->join->thd->is_fatal_error)
+        DBUG_RETURN(NESTED_LOOP_ERROR);
+    }
+  }
+  else if (tab->bush_children)
+  {
+    /* It's a merged SJM nest */
+    enum_nested_loop_state rc;
+    SJ_MATERIALIZATION_INFO *sjm= tab->bush_children->start->emb_sj_nest->sj_mat_info;
+
+    if (!sjm->materialized)
+    {
+      JOIN *join= tab->join;
+      JOIN_TAB *join_tab= tab->bush_children->start;
+      JOIN_TAB *save_return_tab= join->return_tab;
+      /*
+        Now run the join for the inner tables. The first call is to run the
+        join, the second one is to signal EOF (this is essential for some
+        join strategies, e.g. it will make join buffering flush the records)
+      */
+      if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 ||
+          (rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0)
+      {
+        join->return_tab= save_return_tab;
+        DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/
+      }
+      join->return_tab= save_return_tab;
+      sjm->materialized= TRUE;
+    }
+  }
+
+  DBUG_RETURN(NESTED_LOOP_OK);
+}
+
+
+/**
+  Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate
+  based on cost.
+
+  @param join_tables  the set of tables joined in the subquery
+
+  @notes
+  The method chooses between the materialization and IN=>EXISTS rewrite
+  strategies for the execution of a non-flattened subquery IN predicate.
+  The cost-based decision is made as follows:
+
+  1. compute materialize_strategy_cost based on the unmodified subquery
+  2. reoptimize the subquery taking into account the IN-EXISTS predicates
+  3. compute in_exists_strategy_cost based on the reoptimized plan
+  4. compare and set the cheaper strategy
+     if (materialize_strategy_cost >= in_exists_strategy_cost)
+       in_strategy = MATERIALIZATION
+     else
+       in_strategy = IN_TO_EXISTS
+  5. if in_strategy = MATERIALIZATION and it is not possible to initialize it
+       revert to IN_TO_EXISTS
+  6. if (in_strategy == MATERIALIZATION)
+       revert the subquery plan to the original one before reoptimizing
+     else
+       inject the IN=>EXISTS predicates into the new EXISTS subquery plan
+
+  The implementation itself is a bit more complicated because it takes into
+  account two more factors:
+  - whether the user allowed both strategies through an optimizer_switch, and
+  - if materialization was the cheaper strategy, whether it can be executed
+    or not.
+
+  @retval FALSE     success.
+  @retval TRUE      error occurred.
+*/
+
+bool JOIN::choose_subquery_plan(table_map join_tables)
+{
+  Join_plan_state save_qep; /* The original QEP of the subquery. */
+  enum_reopt_result reopt_result= REOPT_NONE;
+  Item_in_subselect *in_subs;
+
+  if (is_in_subquery())
+  {
+    in_subs= (Item_in_subselect*) unit->item;
+    if (in_subs->create_in_to_exists_cond(this))
+      return true;
+  }
+  else
+    return false;
+
+  DBUG_ASSERT(in_subs->in_strategy); /* A strategy must be chosen earlier. */
+  DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
+  DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed);
+  DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed);
+
+  /*
+    Compute and compare the costs of materialization and in-exists if both
+    strategies are possible and allowed by the user (checked during the prepare
+    phase.
+  */
+  if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
+      in_subs->in_strategy & SUBS_IN_TO_EXISTS)
+  {
+    JOIN *outer_join;
+    JOIN *inner_join= this;
+    /* Number of unique value combinations filtered by the IN predicate. */
+    double outer_lookup_keys;
+    /* Cost and row count of the unmodified subquery. */
+    double inner_read_time_1, inner_record_count_1;
+    /* Cost of the subquery with injected IN-EXISTS predicates. */
+    double inner_read_time_2;
+    /* The cost to compute IN via materialization. */
+    double materialize_strategy_cost;
+    /* The cost of the IN->EXISTS strategy. */
+    double in_exists_strategy_cost;
+    double dummy;
+
+    /*
+      A. Estimate the number of rows of the outer table that will be filtered
+      by the IN predicate.
+    */
+    outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
+    if (outer_join && outer_join->table_count > 0)
+    {
+      /*
+        The index of the last JOIN_TAB in the outer JOIN where in_subs is
+        attached (pushed to).
+      */
+      uint max_outer_join_tab_idx;
+      /*
+        Make_cond_for_table is called for predicates only in the WHERE/ON
+        clauses. In all other cases, predicates are not pushed to any
+        JOIN_TAB, and their join_tab_idx remains MAX_TABLES. Such predicates
+        are evaluated for each complete row of the outer join.
+      */
+      max_outer_join_tab_idx= (in_subs->get_join_tab_idx() == MAX_TABLES) ?
+                               outer_join->table_count - 1:
+                               in_subs->get_join_tab_idx();
+      /*
+        TODO:
+        Currently outer_lookup_keys is computed as the number of rows in
+        the partial join including the JOIN_TAB where the IN predicate is
+        pushed to. In the general case this is a gross overestimate because
+        due to caching we are interested only in the number of unique keys.
+        The search key may be formed by columns from much fewer than all
+        tables in the partial join. Example:
+        select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...);
+        If the join order: t1, t2, the number of unique lookup keys is ~ to
+        the number of unique values t2.c2 in the partial join t1 join t2.
+      */
+      outer_join->get_partial_cost_and_fanout(max_outer_join_tab_idx,
+                                              table_map(-1),
+                                              &dummy,
+                                              &outer_lookup_keys);
+    }
+    else
+    {
+      /*
+        TODO: outer_join can be NULL for DELETE statements.
+        How to compute its cost?
+      */
+      outer_lookup_keys= 1;
+    }
+
+    /*
+      B. Estimate the cost and number of records of the subquery both
+      unmodified, and with injected IN->EXISTS predicates.
+    */
+    inner_read_time_1= inner_join->best_read;
+    inner_record_count_1= inner_join->record_count;
+
+    if (in_to_exists_where && const_tables != table_count)
+    {
+      /*
+        Re-optimize and cost the subquery taking into account the IN-EXISTS
+        conditions.
+      */
+      reopt_result= reoptimize(in_to_exists_where, join_tables, &save_qep);
+      if (reopt_result == REOPT_ERROR)
+        return TRUE;
+
+      /* Get the cost of the modified IN-EXISTS plan. */
+      inner_read_time_2= inner_join->best_read;
+
+    }
+    else
+    {
+      /* Reoptimization would not produce any better plan. */
+      inner_read_time_2= inner_read_time_1;
+    }
+
+    /*
+      C. Compute execution costs.
+    */
+    /* C.1 Compute the cost of the materialization strategy. */
+    //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
+    uint rowlen= get_tmp_table_rec_length(ref_pointer_array, 
+                                          select_lex->item_list.elements);
+    /* The cost of writing one row into the temporary table. */
+    double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1,
+                                                rowlen);
+    /* The cost of a lookup into the unique index of the materialized table. */
+    double lookup_cost= get_tmp_table_lookup_cost(thd, inner_record_count_1,
+                                                  rowlen);
+    /*
+      The cost of executing the subquery and storing its result in an indexed
+      temporary table.
+    */
+    double materialization_cost= inner_read_time_1 +
+                                 write_cost * inner_record_count_1;
+
+    materialize_strategy_cost= materialization_cost +
+                               outer_lookup_keys * lookup_cost;
+
+    /* C.2 Compute the cost of the IN=>EXISTS strategy. */
+    in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2;
+
+    /* C.3 Compare the costs and choose the cheaper strategy. */
+    if (materialize_strategy_cost >= in_exists_strategy_cost)
+      in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
+    else
+      in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
+
+    DBUG_PRINT("info",
+               ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f",
+                materialize_strategy_cost, materialization_cost, write_cost, lookup_cost));
+    DBUG_PRINT("info",
+               ("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
+                in_exists_strategy_cost, inner_read_time_2));
+    DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys));
+  }
+
+  /*
+    If (1) materialization is a possible strategy based on semantic analysis
+    during the prepare phase, then if
+      (2) it is more expensive than the IN->EXISTS transformation, and
+      (3) it is not possible to create usable indexes for the materialization
+          strategy,
+      fall back to IN->EXISTS.
+    otherwise
+      use materialization.
+  */
+  if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
+      in_subs->setup_mat_engine())
+  {
+    /*
+      If materialization was the cheaper or the only user-selected strategy,
+      but it is not possible to execute it due to limitations in the
+      implementation, fall back to IN-TO-EXISTS.
+    */
+    in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
+    in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
+  }
+
+  if (in_subs->in_strategy & SUBS_MATERIALIZATION)
+  {
+    /* Restore the original query plan used for materialization. */
+    if (reopt_result == REOPT_NEW_PLAN)
+      restore_query_plan(&save_qep);
+
+    in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+    select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+
+    /*
+      Reset the "LIMIT 1" set in Item_exists_subselect::fix_length_and_dec.
+      TODO:
+      Currently we set the subquery LIMIT to infinity, and this is correct
+      because we forbid at parse time LIMIT inside IN subqueries (see
+      Item_in_subselect::test_limit). However, once we allow this, here
+      we should set the correct limit if given in the query.
+    */
+    in_subs->unit->global_parameters->select_limit= NULL;
+    in_subs->unit->set_limit(unit->global_parameters);
+    /*
+      Set the limit of this JOIN object as well, because normally its being
+      set in the beginning of JOIN::optimize, which was already done.
+    */
+    select_limit= in_subs->unit->select_limit_cnt;
+  }
+  else if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
+  {
+    if (reopt_result == REOPT_NONE && in_to_exists_where &&
+        const_tables != table_count)
+    {
+      /*
+        The subquery was not reoptimized either because the user allowed only
+        the IN-EXISTS strategy, or because materialization was not possible
+        based on semantic analysis. Cleanup the original plan and reoptimize.
+      */
+      for (uint i= 0; i < table_count; i++)
+      {
+        join_tab[i].keyuse= NULL;
+        join_tab[i].checked_keys.clear_all();
+      }
+      if ((reopt_result= reoptimize(in_to_exists_where, join_tables, NULL)) ==
+          REOPT_ERROR)
+        return TRUE;
+    }
+
+    if (in_subs->inject_in_to_exists_cond(this))
+      return TRUE;
+    /*
+      It is IN->EXISTS transformation so we should mark subquery as
+      dependent
+    */
+    in_subs->unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
+    select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
+    select_limit= 1;
+  }
+  else
+    DBUG_ASSERT(FALSE);
+
+  return FALSE;
+}
+
+
+/**
+  Choose a query plan for a table-less subquery.
+
+  @notes
+
+  @retval FALSE     success.
+  @retval TRUE      error occurred.
+*/
+
+bool JOIN::choose_tableless_subquery_plan()
+{
+  DBUG_ASSERT(!tables_list || !table_count);
+  if (unit->item)
+  {
+    DBUG_ASSERT(unit->item->type() == Item::SUBSELECT_ITEM);
+    Item_subselect *subs_predicate= unit->item;
+
+    /*
+      If the optimizer determined that his query has an empty result,
+      in most cases the subquery predicate is a known constant value -
+      either FALSE or NULL. The implementation of Item_subselect::reset()
+      determines which one.
+    */
+    if (zero_result_cause)
+    {
+      if (!implicit_grouping)
+      {
+        /*
+          Both group by queries and non-group by queries without aggregate
+          functions produce empty subquery result.
+        */
+        subs_predicate->reset();
+        subs_predicate->make_const();
+        return FALSE;
+      }
+
+      /* TODO:
+         A further optimization is possible when a non-group query with
+         MIN/MAX/COUNT is optimized by opt_sum_query. Then, if there are
+         only MIN/MAX functions over an empty result set, the subquery
+         result is a NULL value/row, thus the value of subs_predicate is
+         NULL.
+      */
+    }
+
+    if (subs_predicate->is_in_predicate())
+    {
+      Item_in_subselect *in_subs;
+      in_subs= (Item_in_subselect*) subs_predicate;
+      in_subs->in_strategy= SUBS_IN_TO_EXISTS;
+      if (in_subs->create_in_to_exists_cond(this) ||
+          in_subs->inject_in_to_exists_cond(this))
+        return TRUE;
+      tmp_having= having;
+    }
+  }
+  return FALSE;
+}
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
new file mode 100644
index 00000000000..5a7416fe929
--- /dev/null
+++ b/sql/opt_subselect.h
@@ -0,0 +1,379 @@
+/*
+  Semi-join subquery optimization code definitions
+*/
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+int check_and_do_in_subquery_rewrites(JOIN *join);
+bool convert_join_subqueries_to_semijoins(JOIN *join);
+int pull_out_semijoin_tables(JOIN *join);
+bool optimize_semijoin_nests(JOIN *join, table_map all_table_map);
+
+// used by Loose_scan_opt
+ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, 
+                                  table_map remaining_tables);
+
+/*
+  This is a class for considering possible loose index scan optimizations.
+  It's usage pattern is as follows:
+    best_access_path()
+    {
+       Loose_scan_opt opt;
+
+       opt.init()
+       for each index we can do ref access with
+       {
+         opt.next_ref_key();
+         for each keyuse 
+           opt.add_keyuse();
+         opt.check_ref_access();
+       }
+
+       if (some criteria for range scans)
+         opt.check_range_access();
+       
+       opt.get_best_option();
+    }
+*/
+
+class Loose_scan_opt
+{
+public:
+  /* All methods must check this before doing anything else */
+  bool try_loosescan;
+
+  /*
+    If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is
+    called sj-equality. If oeK depends only on preceding tables then such
+    equality is called 'bound'.
+  */
+  ulonglong bound_sj_equalities;
+ 
+  /* Accumulated properties of ref access we're now considering: */
+  ulonglong handled_sj_equalities;
+  key_part_map loose_scan_keyparts;
+  uint max_loose_keypart;
+  bool part1_conds_met;
+
+  /*
+    Use of quick select is a special case. Some of its properties:
+  */
+  uint quick_uses_applicable_index;
+  uint quick_max_loose_keypart;
+  
+  /* Best loose scan method so far */
+  uint   best_loose_scan_key;
+  double best_loose_scan_cost;
+  double best_loose_scan_records;
+  KEYUSE *best_loose_scan_start_key;
+
+  uint best_max_loose_keypart;
+
+  Loose_scan_opt():
+    try_loosescan(FALSE),
+    bound_sj_equalities(0),
+    quick_uses_applicable_index(FALSE)
+  {
+    UNINIT_VAR(quick_max_loose_keypart); /* Protected by quick_uses_applicable_index */
+    /* The following are protected by best_loose_scan_cost!= DBL_MAX */
+    UNINIT_VAR(best_loose_scan_key);
+    UNINIT_VAR(best_loose_scan_records);
+    UNINIT_VAR(best_max_loose_keypart);
+    UNINIT_VAR(best_loose_scan_start_key);
+  }
+  
+  void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables)
+  {
+    /*
+      Discover the bound equalities. We need to do this if
+        1. The next table is an SJ-inner table, and
+        2. It is the first table from that semijoin, and
+        3. We're not within a semi-join range (i.e. all semi-joins either have
+           all or none of their tables in join_table_map), except
+           s->emb_sj_nest (which we've just entered, see #2).
+        4. All non-IN-equality correlation references from this sj-nest are 
+           bound
+        5. But some of the IN-equalities aren't (so this can't be handled by 
+           FirstMatch strategy)
+    */
+    best_loose_scan_cost= DBL_MAX;
+    if (!join->emb_sjm_nest && s->emb_sj_nest &&                        // (1)
+        s->emb_sj_nest->sj_in_exprs < 64 && 
+        ((remaining_tables & s->emb_sj_nest->sj_inner_tables) ==        // (2)
+         s->emb_sj_nest->sj_inner_tables) &&                            // (2)
+        join->cur_sj_inner_tables == 0 &&                                  // (3)
+        !(remaining_tables & 
+          s->emb_sj_nest->nested_join->sj_corr_tables) &&               // (4)
+        remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5)
+        optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN))
+    {
+      /* This table is an LooseScan scan candidate */
+      bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, 
+                                                   remaining_tables);
+      try_loosescan= TRUE;
+      DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx",
+                          (longlong)bound_sj_equalities));
+    }
+  }
+
+  void next_ref_key()
+  {
+    handled_sj_equalities=0;
+    loose_scan_keyparts= 0;
+    max_loose_keypart= 0;
+    part1_conds_met= FALSE;
+  }
+  
+  void add_keyuse(table_map remaining_tables, KEYUSE *keyuse)
+  {
+    if (try_loosescan && keyuse->sj_pred_no != UINT_MAX)
+    {
+      if (!(remaining_tables & keyuse->used_tables))
+      {
+        /* 
+          This allows to use equality propagation to infer that some 
+          sj-equalities are bound.
+        */
+        bound_sj_equalities |= 1ULL << keyuse->sj_pred_no;
+      }
+      else
+      {
+        handled_sj_equalities |= 1ULL << keyuse->sj_pred_no;
+        loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart;
+        set_if_bigger(max_loose_keypart, keyuse->keypart);
+      }
+    }
+  }
+
+  bool have_a_case() { return test(handled_sj_equalities); }
+
+  void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key, 
+                              table_map found_part)
+  {
+    /*
+      Check if we can use LooseScan semi-join strategy. We can if
+      1. This is the right table at right location
+      2. All IN-equalities are either
+         - "bound", ie. the outer_expr part refers to the preceding tables
+         - "handled", ie. covered by the index we're considering
+      3. Index order allows to enumerate subquery's duplicate groups in
+         order. This happens when the index definition matches this
+         pattern:
+
+           (handled_col|bound_col)* (other_col|bound_col)
+
+    */
+    if (try_loosescan &&                                       // (1)
+        (handled_sj_equalities | bound_sj_equalities) ==       // (2)
+        PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) &&   // (2)
+        (PREV_BITS(key_part_map, max_loose_keypart+1) &        // (3)
+         (found_part | loose_scan_keyparts)) ==                // (3)
+         (found_part | loose_scan_keyparts) &&                 // (3)
+        !key_uses_partial_cols(s->table, key))
+    {
+      /* Ok, can use the strategy */
+      part1_conds_met= TRUE;
+      if (s->quick && s->quick->index == key && 
+          s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
+      {
+        quick_uses_applicable_index= TRUE;
+        quick_max_loose_keypart= max_loose_keypart;
+      }
+      DBUG_PRINT("info", ("Can use LooseScan scan"));
+
+      /* 
+        Check if this is a special case where there are no usable bound
+        IN-equalities, i.e. we have
+
+          outer_expr IN (SELECT innertbl.key FROM ...) 
+        
+        and outer_expr cannot be evaluated yet, so it's actually full
+        index scan and not a ref access
+      */
+      if (!(found_part & 1 ) && /* no usable ref access for 1st key part */
+          s->table->covering_keys.is_set(key))
+      {
+        DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
+        
+        /* Calculate the cost of complete loose index scan.  */
+        double records= rows2double(s->table->file->stats.records);
+
+        /* The cost is entire index scan cost (divided by 2) */
+        double read_time= s->table->file->keyread_time(key, 1,
+                                                       (ha_rows) records);
+
+        /*
+          Now find out how many different keys we will get (for now we
+          ignore the fact that we have "keypart_i=const" restriction for
+          some key components, that may make us think think that loose
+          scan will produce more distinct records than it actually will)
+        */
+        ulong rpc;
+        if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart]))
+          records= records / rpc;
+
+        // TODO: previous version also did /2
+        if (read_time < best_loose_scan_cost)
+        {
+          best_loose_scan_key= key;
+          best_loose_scan_cost= read_time;
+          best_loose_scan_records= records;
+          best_max_loose_keypart= max_loose_keypart;
+          best_loose_scan_start_key= start_key;
+        }
+      }
+    }
+  }
+  
+  void check_ref_access_part2(uint key, KEYUSE *start_key, double records, 
+                              double read_time)
+  {
+    if (part1_conds_met && read_time < best_loose_scan_cost)
+    {
+      /* TODO use rec-per-key-based fanout calculations */
+      best_loose_scan_key= key;
+      best_loose_scan_cost= read_time;
+      best_loose_scan_records= records;
+      best_max_loose_keypart= max_loose_keypart;
+      best_loose_scan_start_key= start_key;
+    }
+  }
+
+  void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick)
+  {
+    /* TODO: this the right part restriction: */
+    if (quick_uses_applicable_index && idx == join->const_tables && 
+        quick->read_time < best_loose_scan_cost)
+    {
+      best_loose_scan_key= quick->index;
+      best_loose_scan_cost= quick->read_time;
+      /* this is ok because idx == join->const_tables */
+      best_loose_scan_records= rows2double(quick->records);
+      best_max_loose_keypart= quick_max_loose_keypart;
+      best_loose_scan_start_key= NULL;
+    }
+  }
+
+  void save_to_position(JOIN_TAB *tab, POSITION *pos)
+  {
+    pos->read_time=       best_loose_scan_cost;
+    if (best_loose_scan_cost != DBL_MAX)
+    {
+      pos->records_read=    best_loose_scan_records;
+      pos->key=             best_loose_scan_start_key;
+      pos->loosescan_key=   best_loose_scan_key;
+      pos->loosescan_parts= best_max_loose_keypart + 1;
+      pos->use_join_buffer= FALSE;
+      pos->table=           tab;
+      // todo need ref_depend_map ?
+      DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s",
+                          tab->table->key_info[best_loose_scan_key].name,
+                          best_loose_scan_start_key? "(ref access)":
+                                                     "(range/index access)"));
+    }
+  }
+};
+
+
+void advance_sj_state(JOIN *join, const table_map remaining_tables, 
+                      const JOIN_TAB *new_join_tab, uint idx, 
+                      double *current_record_count, double *current_read_time,
+                      POSITION *loose_scan_pos);
+void restore_prev_sj_state(const table_map remaining_tables, 
+                                  const JOIN_TAB *tab, uint idx);
+
+void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+
+bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab);
+bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab);
+
+TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg,
+                                          SJ_TMP_TABLE *sjtbl);
+int do_sj_reset(SJ_TMP_TABLE *sj_tbl);
+int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl);
+
+/*
+  Temporary table used by semi-join DuplicateElimination strategy
+
+  This consists of the temptable itself and data needed to put records
+  into it. The table's DDL is as follows:
+
+    CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
+
+  where the primary key can be replaced with unique constraint if n exceeds
+  the limit (as it is always done for query execution-time temptables).
+
+  The record value is a concatenation of rowids of tables from the join we're
+  executing. If a join table is on the inner side of the outer join, we
+  assume that its rowid can be NULL and provide means to store this rowid in
+  the tuple.
+*/
+
+class SJ_TMP_TABLE : public Sql_alloc
+{
+public:
+  /*
+    Array of pointers to tables whose rowids compose the temporary table
+    record.
+  */
+  class TAB
+  {
+  public:
+    JOIN_TAB *join_tab;
+    uint rowid_offset;
+    ushort null_byte;
+    uchar null_bit;
+  };
+  TAB *tabs;
+  TAB *tabs_end;
+  
+  /* 
+    is_degenerate==TRUE means this is a special case where the temptable record
+    has zero length (and presence of a unique key means that the temptable can
+    have either 0 or 1 records). 
+    In this case we don't create the physical temptable but instead record
+    its state in SJ_TMP_TABLE::have_degenerate_row.
+  */
+  bool is_degenerate;
+
+  /* 
+    When is_degenerate==TRUE: the contents of the table (whether it has the
+    record or not).
+  */
+  bool have_degenerate_row;
+  
+  /* table record parameters */
+  uint null_bits;
+  uint null_bytes;
+  uint rowid_len;
+
+  /* The temporary table itself (NULL means not created yet) */
+  TABLE *tmp_table;
+  
+  /*
+    These are the members we got from temptable creation code. We'll need
+    them if we'll need to convert table from HEAP to MyISAM/Maria.
+  */
+  ENGINE_COLUMNDEF *start_recinfo;
+  ENGINE_COLUMNDEF *recinfo;
+
+  /* Pointer to next table (next->start_idx > this->end_idx) */
+  SJ_TMP_TABLE *next; 
+};
+
+int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, 
+                                    uint no_jbuf_after);
+void destroy_sj_tmp_tables(JOIN *join);
+int clear_sj_tmp_tables(JOIN *join);
+int rewrite_to_index_subquery_engine(JOIN *join);
+
+
+void get_delayed_table_estimates(TABLE *table,
+                                 ha_rows *out_rows, 
+                                 double *scan_time,
+                                 double *startup_cost);
+
+enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab);
+
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index e187cf19917..bcc8a42efc6 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011 Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -75,10 +76,12 @@ static int maxmin_in_range(bool max_fl, Field* field, COND *cond);
     #			Multiplication of number of rows in all tables
 */
 
-static ulonglong get_exact_record_count(TABLE_LIST *tables)
+static ulonglong get_exact_record_count(List<TABLE_LIST> &tables)
 {
   ulonglong count= 1;
-  for (TABLE_LIST *tl= tables; tl; tl= tl->next_leaf)
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(tables);
+  while ((tl= ti++))
   {
     ha_rows tmp= tl->table->file->records();
     if ((tmp == HA_POS_ERROR))
@@ -110,7 +113,7 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
   int error;
   
   if (!ref->key_length)
-    error= table->file->index_first(table->record[0]);
+    error= table->file->ha_index_first(table->record[0]);
   else 
   {
     /*
@@ -132,10 +135,10 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
          Closed interval: Either The MIN argument is non-nullable, or
          we have a >= predicate for the MIN argument.
       */
-      error= table->file->index_read_map(table->record[0],
-                                         ref->key_buff,
-                                         make_prev_keypart_map(ref->key_parts),
-                                         HA_READ_KEY_OR_NEXT);
+      error= table->file->ha_index_read_map(table->record[0],
+                                            ref->key_buff,
+                                            make_prev_keypart_map(ref->key_parts),
+                                            HA_READ_KEY_OR_NEXT);
     else
     {
       /*
@@ -147,10 +150,10 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
         and it would not work.
       */
       DBUG_ASSERT(prefix_len < ref->key_length);
-      error= table->file->index_read_map(table->record[0],
-                                         ref->key_buff,
-                                         make_prev_keypart_map(ref->key_parts),
-                                         HA_READ_AFTER_KEY);
+      error= table->file->ha_index_read_map(table->record[0],
+                                            ref->key_buff,
+                                            make_prev_keypart_map(ref->key_parts),
+                                            HA_READ_AFTER_KEY);
       /* 
          If the found record is outside the group formed by the search
          prefix, or there is no such record at all, check if all
@@ -173,10 +176,10 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
            key_cmp_if_same(table, ref->key_buff, ref->key, prefix_len)))
       {
         DBUG_ASSERT(item_field->field->real_maybe_null());
-        error= table->file->index_read_map(table->record[0],
-                                           ref->key_buff,
-                                           make_prev_keypart_map(ref->key_parts),
-                                           HA_READ_KEY_EXACT);
+        error= table->file->ha_index_read_map(table->record[0],
+                                              ref->key_buff,
+                                              make_prev_keypart_map(ref->key_parts),
+                                              HA_READ_KEY_EXACT);
       }
     }
   }
@@ -199,12 +202,12 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
 static int get_index_max_value(TABLE *table, TABLE_REF *ref, uint range_fl)
 {
   return (ref->key_length ?
-          table->file->index_read_map(table->record[0], ref->key_buff,
-                                      make_prev_keypart_map(ref->key_parts),
-                                      range_fl & NEAR_MAX ?
-                                      HA_READ_BEFORE_KEY : 
-                                      HA_READ_PREFIX_LAST_OR_PREV) :
-          table->file->index_last(table->record[0]));
+          table->file->ha_index_read_map(table->record[0], ref->key_buff,
+                                         make_prev_keypart_map(ref->key_parts),
+                                         range_fl & NEAR_MAX ?
+                                         HA_READ_BEFORE_KEY : 
+                                         HA_READ_PREFIX_LAST_OR_PREV) :
+          table->file->ha_index_last(table->record[0]));
 }
 
 
@@ -235,9 +238,11 @@ static int get_index_max_value(TABLE *table, TABLE_REF *ref, uint range_fl)
 */
 
 int opt_sum_query(THD *thd,
-                  TABLE_LIST *tables, List<Item> &all_fields, COND *conds)
+                  List<TABLE_LIST> &tables, List<Item> &all_fields, COND *conds)
 {
   List_iterator_fast<Item> it(all_fields);
+  List_iterator<TABLE_LIST> ti(tables);
+  TABLE_LIST *tl;
   int const_result= 1;
   bool recalc_const_item= 0;
   ulonglong count= 1;
@@ -245,8 +250,7 @@ int opt_sum_query(THD *thd,
   table_map removed_tables= 0, outer_tables= 0, used_tables= 0;
   table_map where_tables= 0;
   Item *item;
-  int error;
-
+  int error= 0;
   DBUG_ENTER("opt_sum_query");
 
   if (conds)
@@ -256,7 +260,7 @@ int opt_sum_query(THD *thd,
     Analyze outer join dependencies, and, if possible, compute the number
     of returned rows.
   */
-  for (TABLE_LIST *tl= tables; tl; tl= tl->next_leaf)
+  while ((tl= ti++))
   {
     TABLE_LIST *embedded;
     for (embedded= tl ; embedded; embedded= embedded->embedding)
@@ -297,6 +301,14 @@ int opt_sum_query(THD *thd,
       is_exact_count= FALSE;
       count= 1;                                 // ensure count != 0
     }
+    else if (tl->is_materialized_derived())
+    {
+      /*
+        Can't remove a derived table as it's number of rows is just an
+        estimate.
+      */
+      DBUG_RETURN(0);
+    }
     else
     {
       error= tl->table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
@@ -380,18 +392,17 @@ int opt_sum_query(THD *thd,
             const_result= 0;
             break;
           }
-          table->file->ha_index_init((uint) ref.key, 1);
-
-          error= is_max ? 
-                 get_index_max_value(table, &ref, range_fl) :
-                 get_index_min_value(table, &ref, item_field, range_fl,
-                                     prefix_len);
+          if (!(error= table->file->ha_index_init((uint) ref.key, 1)))
+            error= (is_max ? 
+                    get_index_max_value(table, &ref, range_fl) :
+                    get_index_min_value(table, &ref, item_field, range_fl,
+                                        prefix_len));
 
           /* Verify that the read tuple indeed matches the search key */
 	  if (!error && reckey_in_range(is_max, &ref, item_field->field, 
 			                conds, range_fl, prefix_len))
 	    error= HA_ERR_KEY_NOT_FOUND;
-          table->set_keyread(FALSE);
+          table->disable_keyread();
           table->file->ha_index_end();
           if (error)
 	  {
@@ -489,28 +500,30 @@ bool simple_pred(Item_func *func_item, Item **args, bool *inv_order)
     /* MULT_EQUAL_FUNC */
     {
       Item_equal *item_equal= (Item_equal *) func_item;
-      Item_equal_iterator it(*item_equal);
-      args[0]= it++;
-      if (it++)
-        return 0;
       if (!(args[1]= item_equal->get_const()))
         return 0;
+      Item_equal_fields_iterator it(*item_equal);
+      if (!(item= it++))
+        return 0;
+      args[0]= item->real_item();
+      if (it++)
+        return 0;
     }
     break;
   case 1:
     /* field IS NULL */
-    item= func_item->arguments()[0];
+    item= func_item->arguments()[0]->real_item();
     if (item->type() != Item::FIELD_ITEM)
       return 0;
     args[0]= item;
     break;
   case 2:
     /* 'field op const' or 'const op field' */
-    item= func_item->arguments()[0];
+    item= func_item->arguments()[0]->real_item();
     if (item->type() == Item::FIELD_ITEM)
     {
       args[0]= item;
-      item= func_item->arguments()[1];
+      item= func_item->arguments()[1]->real_item();
       if (!item->const_item())
         return 0;
       args[1]= item;
@@ -518,7 +531,7 @@ bool simple_pred(Item_func *func_item, Item **args, bool *inv_order)
     else if (item->const_item())
     {
       args[1]= item;
-      item= func_item->arguments()[1];
+      item= func_item->arguments()[1]->real_item();
       if (item->type() != Item::FIELD_ITEM)
         return 0;
       args[0]= item;
@@ -529,13 +542,13 @@ bool simple_pred(Item_func *func_item, Item **args, bool *inv_order)
     break;
   case 3:
     /* field BETWEEN const AND const */
-    item= func_item->arguments()[0];
+    item= func_item->arguments()[0]->real_item();
     if (item->type() == Item::FIELD_ITEM)
     {
       args[0]= item;
       for (int i= 1 ; i <= 2; i++)
       {
-        item= func_item->arguments()[i];
+        item= func_item->arguments()[i]->real_item();
         if (!item->const_item())
           return 0;
         args[i]= item;
@@ -616,7 +629,7 @@ static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo,
   if (!(cond->used_tables() & field->table->map))
   {
     /* Condition doesn't restrict the used table */
-    DBUG_RETURN(TRUE);
+    DBUG_RETURN(!cond->const_item());
   }
   if (cond->type() == Item::COND_ITEM)
   {
@@ -752,7 +765,7 @@ static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo,
         since set_null will be ignored, and we will compare uninitialized data.
       */
       if (!part->field->real_maybe_null())
-        DBUG_RETURN(false);
+        DBUG_RETURN(FALSE);
       part->field->set_null();
       *key_ptr= (uchar) 1;
     }
@@ -840,7 +853,7 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
                                 uint *range_fl, uint *prefix_len)
 {
   if (!(field->flags & PART_KEY_FLAG))
-    return false;                               // Not key field
+    return FALSE;                               // Not key field
 
   DBUG_ENTER("find_key_for_maxmin");
 
@@ -867,7 +880,7 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
          part++, jdx++, key_part_to_use= (key_part_to_use << 1) | 1)
     {
       if (!(table->file->index_flags(idx, jdx, 0) & HA_READ_ORDER))
-        DBUG_RETURN(false);
+        DBUG_RETURN(FALSE);
 
       /* Check whether the index component is partial */
       Field *part_field= table->field[part->fieldnr-1];
@@ -915,13 +928,13 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
             converted (for example to upper case)
           */
           if (field->part_of_key.is_set(idx))
-            table->set_keyread(TRUE);
-          DBUG_RETURN(true);
+            table->enable_keyread();
+          DBUG_RETURN(TRUE);
         }
       }
     }
   }
-  DBUG_RETURN(false);
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -1003,15 +1016,11 @@ static int maxmin_in_range(bool max_fl, Field* field, COND *cond)
       SELECT MAX(b) FROM t1 WHERE a=const AND b<const
     */
     if (max_fl != less_fl)
-      return cond->val_int() == 0;                // Return 1 if WHERE is false
+      return cond->val_int() == 0;               // Return 1 if WHERE is false
     return 0;
   }
-  case Item_func::EQ_FUNC:
-  case Item_func::EQUAL_FUNC:
-    break;
-  default:                                        // Keep compiler happy
-    DBUG_ASSERT(1);                               // Impossible
-    break;
+  default:
+    break;                                      // Ignore
   }
   return 0;
 }
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
new file mode 100644
index 00000000000..9ab6e0e84d2
--- /dev/null
+++ b/sql/opt_table_elimination.cc
@@ -0,0 +1,1867 @@
+/**
+  @file
+
+  @brief
+    Table Elimination Module
+
+  @defgroup Table_Elimination Table Elimination Module
+  @{
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "my_bit.h"
+#include "sql_select.h"
+
+/*
+  OVERVIEW
+  ========
+
+  This file contains table elimination module. The idea behind table
+  elimination is as follows: suppose we have a left join
+ 
+    SELECT * FROM t1 LEFT JOIN 
+      (t2 JOIN t3) ON t2.primary_key=t1.col AND 
+                      t2.primary_key=t2.col
+    WHERE ...
+
+  such that
+  * columns of the inner tables are not used anywhere ouside the outer join
+    (not in WHERE, not in GROUP/ORDER BY clause, not in select list etc etc),
+  * inner side of the outer join is guaranteed to produce at most one matching
+    record combination for each record combination of outer tables.
+  
+  then the inner side of the outer join can be removed from the query, as it 
+  will always produce only one record combination (either real or 
+  null-complemented one) and we don't care about what that record combination 
+  is.
+
+
+  MODULE INTERFACE
+  ================
+
+  The module has one entry point - the eliminate_tables() function, which one
+  needs to call (once) at some point before join optimization.
+  eliminate_tables() operates over the JOIN structures. Logically, it
+  removes the inner tables of an outer join operation together with the
+  operation itself. Physically, it changes the following members:
+
+  * Eliminated tables are marked as constant and moved to the front of the
+    join order.
+
+  * In addition to this, they are recorded in JOIN::eliminated_tables bitmap.
+
+  * Items that became disused because they were in the ON expression of an 
+    eliminated outer join are notified by means of the Item tree walk which 
+    calls Item::mark_as_eliminated_processor for every item
+    - At the moment the only Item that cares whether it was eliminated is 
+      Item_subselect with its Item_subselect::eliminated flag which is used
+      by EXPLAIN code to check if the subquery should be shown in EXPLAIN.
+
+  Table elimination is redone on every PS re-execution.
+
+
+  TABLE ELIMINATION ALGORITHM FOR ONE OUTER JOIN
+  ==============================================
+
+  As described above, we can remove inner side of an outer join if it is 
+
+    1. not referred to from any other parts of the query
+    2. always produces one matching record combination.
+
+  We check #1 by doing a recursive descent down the join->join_list while
+  maintaining a union of used_tables() attribute of all Item expressions in
+  other parts of the query. When we encounter an outer join, we check if the
+  bitmap of tables on its inner side has intersection with tables that are used
+  elsewhere. No intersection means that inner side of the outer join could 
+  potentially be eliminated.
+
+  In order to check #2, one needs to prove that inner side of an outer join 
+  is functionally dependent on the outside. The proof is constructed from
+  functional dependencies of intermediate objects:
+
+  - Inner side of outer join is functionally dependent when each of its tables
+    are functionally dependent. (We assume a table is functionally dependent 
+    when its dependencies allow to uniquely identify one table record, or no
+    records).
+
+  - Table is functionally dependent when it has got a unique key whose columns
+    are functionally dependent.
+
+  - A column is functionally dependent when we could locate an AND-part of a
+    certain ON clause in form 
+      
+      tblX.columnY= expr 
+    
+    where expr is functionally depdendent. expr is functionally dependent when 
+    all columns that it refers to are functionally dependent.
+
+  These relationships are modeled as a bipartite directed graph that has
+  dependencies as edges and two kinds of nodes:
+
+  Value nodes:
+   - Table column values (each is a value of tblX.columnY)
+   - Table values (each node represents a table inside the join nest we're
+     trying to eliminate).
+  A value has one attribute, it is either bound (i.e. functionally dependent) 
+  or not.
+
+  Module nodes:
+   - Modules representing tblX.colY=expr equalities. Equality module has 
+      = incoming edges from columns used in expr 
+      = outgoing edge to tblX.colY column.
+   - Nodes representing unique keys. Unique key has
+      = incoming edges from key component value modules
+      = outgoing edge to key's table module
+   - Inner side of outer join module. Outer join module has
+      = incoming edges from table value modules
+      = No outgoing edges. Once we reach it, we know we can eliminate the 
+        outer join.
+  A module may depend on multiple values, and hence its primary attribute is
+  the number of its arguments that are not bound. 
+
+  The algorithm starts with equality nodes that don't have any incoming edges
+  (their expressions are either constant or depend only on tables that are
+  outside of the outer join in question) and performns a breadth-first
+  traversal. If we reach the outer join nest node, it means outer join is
+  functionally dependent and can be eliminated. Otherwise it cannot be
+  eliminated.
+ 
+  HANDLING MULTIPLE NESTED OUTER JOINS
+  ====================================
+
+  Outer joins that are not nested one within another are eliminated
+  independently. For nested outer joins we have the following considerations:
+  
+  1. ON expressions from children outer joins must be taken into account 
+   
+  Consider this example:
+
+    SELECT t0.* 
+    FROM 
+      t0  
+    LEFT JOIN 
+      (t1 LEFT JOIN t2 ON t2.primary_key=t1.col1)
+    ON 
+      t1.primary_key=t0.col AND t2.col1=t1.col2
+
+  Here we cannot eliminate the "... LEFT JOIN t2 ON ..." part alone because the
+  ON clause of top level outer join has references to table t2. 
+  We can eliminate the entire  "... LEFT JOIN (t1 LEFT JOIN t2) ON .." part,
+  but in order to do that, we must look at both ON expressions.
+  
+  2. ON expressions of parent outer joins are useless.
+  Consider an example:
+
+    SELECT t0.* 
+    FROM
+      t0 
+    LEFT JOIN 
+      (t1 LEFT JOIN t2 ON some_expr)
+    ON
+      t2.primary_key=t1.col  -- (*)
+  
+  Here the uppermost ON expression has a clause that gives us functional
+  dependency of table t2 on t1 and hence could be used to eliminate the
+  "... LEFT JOIN t2 ON..." part.
+  However, we would not actually encounter this situation, because before the
+  table elimination we run simplify_joins(), which, among other things, upon
+  seeing a functional dependency condition like (*) will convert the outer join
+  of
+    
+    "... LEFT JOIN t2 ON ..."
+  
+  into inner join and thus make table elimination not to consider eliminating
+  table t2.
+*/
+
+class Dep_value;
+  class Dep_value_field;
+  class Dep_value_table;
+ 
+
+class Dep_module;
+  class Dep_module_expr;
+  class Dep_module_goal;
+  class Dep_module_key;
+
+class Dep_analysis_context;
+
+
+/*
+  A value, something that can be bound or not bound. One can also iterate over 
+  unbound modules that depend on this value
+*/
+
+class Dep_value : public Sql_alloc
+{
+public:
+  Dep_value(): bound(FALSE) {}
+  virtual ~Dep_value(){} /* purecov: inspected */ /* stop compiler warnings */
+  
+  bool is_bound() { return bound; }
+  void make_bound() { bound= TRUE; }
+
+  /* Iteration over unbound modules that depend on this value */
+  typedef char *Iterator;
+  virtual Iterator init_unbound_modules_iter(char *buf)=0;
+  virtual Dep_module* get_next_unbound_module(Dep_analysis_context *dac,
+                                              Iterator iter) = 0;
+  static const size_t iterator_size;
+protected:
+  bool bound;
+};
+
+
+/*
+  A table field value. There is exactly only one such object for any tblX.fieldY
+  - the field depends on its table and equalities
+  - expressions that use the field are its dependencies
+*/
+
+class Dep_value_field : public Dep_value
+{
+public:
+  Dep_value_field(Dep_value_table *table_arg, Field *field_arg) :
+    table(table_arg), field(field_arg)
+  {}
+
+  Dep_value_table *table; /* Table this field is from */
+  Field *field; /* Field this object is representing */
+  
+  /* Iteration over unbound modules that are our dependencies */
+  Iterator init_unbound_modules_iter(char *buf);
+  Dep_module* get_next_unbound_module(Dep_analysis_context *dac, 
+                                      Iterator iter);
+  
+  void make_unbound_modules_iter_skip_keys(Iterator iter);
+  
+  static const size_t iterator_size;
+private:
+  /* 
+    Field_deps that belong to one table form a linked list, ordered by
+    field_index 
+  */
+  Dep_value_field *next_table_field;
+
+  /*
+    Offset to bits in Dep_analysis_context::expr_deps (see comment to that 
+    member for semantics of the bits).
+  */
+  uint bitmap_offset;
+
+  class Module_iter
+  {
+  public:
+    /* if not null, return this and advance */
+    Dep_module_key *key_dep;
+    /* Otherwise, this and advance */
+    uint equality_no;
+  };
+  friend class Dep_analysis_context;
+  friend class Field_dependency_recorder; 
+  friend class Dep_value_table;
+};
+
+const size_t Dep_value_field::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_value_field::Module_iter));
+
+
+/*
+  A table value. There is one Dep_value_table object for every table that can
+  potentially be eliminated.
+
+  Table becomes bound as soon as some of its unique keys becomes bound
+  Once the table is bound:
+   - all of its fields are bound
+   - its embedding outer join has one less unknown argument
+*/
+
+class Dep_value_table : public Dep_value
+{
+public:
+  Dep_value_table(TABLE *table_arg) : 
+    table(table_arg), fields(NULL), keys(NULL)
+  {}
+  TABLE *table;  /* Table this object is representing */
+  /* Ordered list of fields that belong to this table */
+  Dep_value_field *fields;
+  Dep_module_key *keys; /* Ordered list of Unique keys in this table */
+
+  /* Iteration over unbound modules that are our dependencies */
+  Iterator init_unbound_modules_iter(char *buf);
+  Dep_module* get_next_unbound_module(Dep_analysis_context *dac, 
+                                      Iterator iter);
+  static const size_t iterator_size;
+private:
+  class Module_iter
+  {
+  public:
+    /* Space for field iterator */
+    char buf[Dep_value_field::iterator_size];
+    /* !NULL <=> iterating over depdenent modules of this field */
+    Dep_value_field *field_dep; 
+    bool returned_goal;
+  };
+};
+
+
+const size_t Dep_value_table::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_value_table::Module_iter));
+
+const size_t Dep_value::iterator_size=
+  max(Dep_value_table::iterator_size, Dep_value_field::iterator_size);
+
+
+/*
+  A 'module'. Module has unsatisfied dependencies, number of whose is stored in
+  unbound_args. Modules also can be linked together in a list.
+*/
+
+class Dep_module : public Sql_alloc
+{
+public:
+  virtual ~Dep_module(){}  /* purecov: inspected */ /* stop compiler warnings */
+  
+  /* Mark as bound. Currently is non-virtual and does nothing */
+  void make_bound() {};
+
+  /* 
+    The final module will return TRUE here. When we see that TRUE was returned,
+    that will mean that functional dependency check succeeded.
+  */
+  virtual bool is_final () { return FALSE; }
+
+  /* 
+    Increment number of bound arguments. this is expected to change
+    is_applicable() from false to true after sufficient set of arguments is
+    bound.
+  */
+  void touch() { unbound_args--; }
+  bool is_applicable() { return !test(unbound_args); }
+  
+  /* Iteration over values that */
+  typedef char *Iterator;
+  virtual Iterator init_unbound_values_iter(char *buf)=0;
+  virtual Dep_value* get_next_unbound_value(Dep_analysis_context *dac,
+                                            Iterator iter)=0;
+  static const size_t iterator_size;
+protected:
+  uint unbound_args;
+  
+  Dep_module() : unbound_args(0) {}
+  /* to bump unbound_args when constructing depedendencies */
+  friend class Field_dependency_recorder; 
+  friend class Dep_analysis_context;
+};
+
+
+/*
+  This represents either
+   - "tbl.column= expr" equality dependency, i.e. tbl.column depends on fields
+     used in the expression, or
+   - tbl1.col1=tbl2.col2=... multi-equality.
+*/
+
+class Dep_module_expr : public Dep_module
+{
+public:
+  Dep_value_field *field;
+  Item  *expr;
+  
+  List<Dep_value_field> *mult_equal_fields;
+  /* Used during condition analysis only, similar to KEYUSE::level */
+  uint level;
+
+  Iterator init_unbound_values_iter(char *buf);
+  Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter);
+  static const size_t iterator_size;
+private:
+  class Value_iter
+  {
+  public:
+    Dep_value_field *field;
+    List_iterator<Dep_value_field> it;
+  };
+};
+
+const size_t Dep_module_expr::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_module_expr::Value_iter));
+
+
+/*
+  A Unique key module
+   - Unique key has all of its components as arguments
+   - Once unique key is bound, its table value is known
+*/
+
+class Dep_module_key: public Dep_module
+{
+public:
+  Dep_module_key(Dep_value_table *table_arg, uint keyno_arg, uint n_parts_arg) :
+    table(table_arg), keyno(keyno_arg), next_table_key(NULL)
+  {
+    unbound_args= n_parts_arg;
+  }
+  Dep_value_table *table; /* Table this key is from */
+  uint keyno;  /* The index we're representing */
+  /* Unique keys form a linked list, ordered by keyno */
+  Dep_module_key *next_table_key;
+  
+  Iterator init_unbound_values_iter(char *buf);
+  Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter);
+  static const size_t iterator_size;
+private:
+  class Value_iter
+  {
+  public:
+    Dep_value_table *table;
+  };
+};
+
+const size_t Dep_module_key::iterator_size= 
+  ALIGN_SIZE(sizeof(Dep_module_key::Value_iter));
+
+const size_t Dep_module::iterator_size=
+  max(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);
+
+
+/*
+  A module that represents outer join that we're trying to eliminate. If we 
+  manage to declare this module to be bound, then outer join can be eliminated.
+*/
+
+class Dep_module_goal: public Dep_module
+{
+public:
+  Dep_module_goal(uint n_children)  
+  {
+    unbound_args= n_children;
+  }
+  bool is_final() { return TRUE; }
+  /* 
+    This is the goal module, so the running wave algorithm should terminate
+    once it sees that this module is applicable and should never try to apply
+    it, hence no use for unbound value iterator implementation.
+  */
+  Iterator init_unbound_values_iter(char *buf)
+  { 
+    DBUG_ASSERT(0); 
+    return NULL;
+  }
+  Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter)
+  {
+    DBUG_ASSERT(0); 
+    return NULL;
+  }
+};
+
+
+/*
+  Functional dependency analyzer context
+*/
+class Dep_analysis_context
+{
+public:
+  bool setup_equality_modules_deps(List<Dep_module> *bound_modules);
+  bool run_wave(List<Dep_module> *new_bound_modules);
+
+  /* Tables that we're looking at eliminating */
+  table_map usable_tables;
+  
+  /* Array of equality dependencies */
+  Dep_module_expr *equality_mods;
+  uint n_equality_mods; /* Number of elements in the array */
+  uint n_equality_mods_alloced;
+
+  /* tablenr -> Dep_value_table* mapping. */
+  Dep_value_table *table_deps[MAX_KEY];
+  
+  /* Element for the outer join we're attempting to eliminate */
+  Dep_module_goal *outer_join_dep;
+
+  /* 
+    Bitmap of how expressions depend on bits. Given a Dep_value_field object,
+    one can check bitmap_is_set(expr_deps, field_val->bitmap_offset + expr_no)
+    to see if expression equality_mods[expr_no] depends on the given field.
+  */
+  MY_BITMAP expr_deps;
+  
+  Dep_value_table *create_table_value(TABLE *table);
+  Dep_value_field *get_field_value(Field *field);
+
+#ifndef DBUG_OFF
+  void dbug_print_deps();
+#endif 
+};
+
+
+void eliminate_tables(JOIN *join);
+
+static bool
+eliminate_tables_for_list(JOIN *join, 
+                          List<TABLE_LIST> *join_list,
+                          table_map tables_in_list,
+                          Item *on_expr,
+                          table_map tables_used_elsewhere);
+static
+bool check_func_dependency(JOIN *join, 
+                           table_map dep_tables,
+                           List_iterator<TABLE_LIST> *it, 
+                           TABLE_LIST *oj_tbl,
+                           Item* cond);
+static 
+void build_eq_mods_for_cond(Dep_analysis_context *dac, 
+                            Dep_module_expr **eq_mod, uint *and_level, 
+                            Item *cond);
+static 
+void check_equality(Dep_analysis_context *dac, Dep_module_expr **eq_mod, 
+                    uint and_level, Item_func *cond, Item *left, Item *right);
+static 
+Dep_module_expr *merge_eq_mods(Dep_module_expr *start, 
+                                 Dep_module_expr *new_fields, 
+                                 Dep_module_expr *end, uint and_level);
+static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl);
+static 
+void add_module_expr(Dep_analysis_context *dac, Dep_module_expr **eq_mod,
+                     uint and_level, Dep_value_field *field_val, Item *right,
+                     List<Dep_value_field>* mult_equal_fields);
+
+
+/*****************************************************************************/
+
+/*
+  Perform table elimination
+
+  SYNOPSIS
+    eliminate_tables()
+      join                   Join to work on
+
+  DESCRIPTION
+    This is the entry point for table elimination. Grep for MODULE INTERFACE
+    section in this file for calling convention.
+
+    The idea behind table elimination is that if we have an outer join:
+   
+      SELECT * FROM t1 LEFT JOIN 
+        (t2 JOIN t3) ON t2.primary_key=t1.col AND 
+                        t3.primary_key=t2.col
+    such that
+
+    1. columns of the inner tables are not used anywhere ouside the outer
+       join (not in WHERE, not in GROUP/ORDER BY clause, not in select list 
+       etc etc), and
+    2. inner side of the outer join is guaranteed to produce at most one
+       record combination for each record combination of outer tables.
+    
+    then the inner side of the outer join can be removed from the query.
+    This is because it will always produce one matching record (either a
+    real match or a NULL-complemented record combination), and since there
+    are no references to columns of the inner tables anywhere, it doesn't
+    matter which record combination it was.
+
+    This function primary handles checking #1. It collects a bitmap of
+    tables that are not used in select list/GROUP BY/ORDER BY/HAVING/etc and
+    thus can possibly be eliminated.
+
+    After this, if #1 is met, the function calls eliminate_tables_for_list()
+    that checks #2.
+  
+  SIDE EFFECTS
+    See the OVERVIEW section at the top of this file.
+
+*/
+
+void eliminate_tables(JOIN *join)
+{
+  THD* thd= join->thd;
+  Item *item;
+  table_map used_tables;
+  DBUG_ENTER("eliminate_tables");
+  
+  DBUG_ASSERT(join->eliminated_tables == 0);
+
+  /* If there are no outer joins, we have nothing to eliminate: */
+  if (!join->outer_join)
+    DBUG_VOID_RETURN;
+
+#ifndef DBUG_OFF
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_TABLE_ELIMINATION))
+    DBUG_VOID_RETURN; /* purecov: inspected */
+#endif
+
+  /* Find the tables that are referred to from WHERE/HAVING */
+  used_tables= (join->conds?  join->conds->used_tables() : 0) | 
+               (join->having? join->having->used_tables() : 0);
+  
+  /* Add tables referred to from the select list */
+  List_iterator<Item> it(join->fields_list);
+  while ((item= it++))
+    used_tables |= item->used_tables();
+ 
+  /* Add tables referred to from ORDER BY and GROUP BY lists */
+  ORDER *all_lists[]= { join->order, join->group_list};
+  for (int i=0; i < 2; i++)
+  {
+    for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next)
+      used_tables |= (*(cur_list->item))->used_tables();
+  }
+  
+  if (join->select_lex == &thd->lex->select_lex)
+  {
+
+    /* Multi-table UPDATE: don't eliminate tables referred from SET statement */
+    if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI)
+    {
+      /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */
+      used_tables |= thd->table_map_for_update;
+      List_iterator<Item> it2(thd->lex->value_list);
+      while ((item= it2++))
+        used_tables |= item->used_tables();
+    }
+
+    if (thd->lex->sql_command == SQLCOM_DELETE_MULTI)
+    {
+      TABLE_LIST *tbl;
+      for (tbl= (TABLE_LIST*)thd->lex->auxiliary_table_list.first;
+           tbl; tbl= tbl->next_local)
+      {
+        used_tables |= tbl->table->map;
+      }
+    }
+  }
+  
+  table_map all_tables= join->all_tables_map();
+  if (all_tables & ~used_tables)
+  {
+    /* There are some tables that we probably could eliminate. Try it. */
+    eliminate_tables_for_list(join, join->join_list, all_tables, NULL,
+                              used_tables);
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Perform table elimination in a given join list
+
+  SYNOPSIS
+    eliminate_tables_for_list()
+      join                    The join we're working on
+      join_list               Join list to eliminate tables from (and if
+                              on_expr !=NULL, then try eliminating join_list
+                              itself)
+      list_tables             Bitmap of tables embedded in the join_list.
+      on_expr                 ON expression, if the join list is the inner side
+                              of an outer join.
+                              NULL means it's not an outer join but rather a
+                              top-level join list.
+      tables_used_elsewhere   Bitmap of tables that are referred to from
+                              somewhere outside of the join list (e.g.
+                              select list, HAVING, other ON expressions, etc).
+
+  DESCRIPTION
+    Perform table elimination in a given join list:
+    - First, walk through join list members and try doing table elimination for
+      them.
+    - Then, if the join list itself is an inner side of outer join
+      (on_expr!=NULL), then try to eliminate the entire join list.
+
+    See "HANDLING MULTIPLE NESTED OUTER JOINS" section at the top of this file
+    for more detailed description and justification.
+    
+  RETURN
+    TRUE   The entire join list eliminated
+    FALSE  Join list wasn't eliminated (but some of its child outer joins 
+           possibly were)
+*/
+
+static bool
+eliminate_tables_for_list(JOIN *join, List<TABLE_LIST> *join_list,
+                          table_map list_tables, Item *on_expr,
+                          table_map tables_used_elsewhere)
+{
+  TABLE_LIST *tbl;
+  List_iterator<TABLE_LIST> it(*join_list);
+  table_map tables_used_on_left= 0;
+  bool all_eliminated= TRUE;
+
+  while ((tbl= it++))
+  {
+    if (tbl->on_expr)
+    {
+      table_map outside_used_tables= tables_used_elsewhere | 
+                                     tables_used_on_left;
+      if (tbl->nested_join)
+      {
+        /* This is  "... LEFT JOIN (join_nest) ON cond" */
+        if (eliminate_tables_for_list(join,
+                                      &tbl->nested_join->join_list, 
+                                      tbl->nested_join->used_tables, 
+                                      tbl->on_expr,
+                                      outside_used_tables))
+        {
+          mark_as_eliminated(join, tbl);
+        }
+        else
+          all_eliminated= FALSE;
+      }
+      else
+      {
+        /* This is  "... LEFT JOIN tbl ON cond" */
+        if (!(tbl->table->map & outside_used_tables) &&
+            check_func_dependency(join, tbl->table->map, NULL, tbl, 
+                                  tbl->on_expr))
+        {
+          mark_as_eliminated(join, tbl);
+        }
+        else
+          all_eliminated= FALSE;
+      }
+      tables_used_on_left |= tbl->on_expr->used_tables();
+    }
+    else
+    {
+      DBUG_ASSERT(!tbl->nested_join || tbl->sj_on_expr);
+      //psergey-todo: is the following really correct or we'll need to descend
+      //down all ON clauses: ? 
+      if (tbl->sj_on_expr)
+        tables_used_on_left |= tbl->sj_on_expr->used_tables();
+    }
+  }
+
+  /* Try eliminating the nest we're called for */
+  if (all_eliminated && on_expr && !(list_tables & tables_used_elsewhere))
+  {
+    it.rewind();
+    return check_func_dependency(join, list_tables & ~join->eliminated_tables,
+                                 &it, NULL, on_expr);
+  }
+  return FALSE; /* not eliminated */
+}
+
+
+/*
+  Check if given condition makes given set of tables functionally dependent
+
+  SYNOPSIS
+    check_func_dependency()
+      join         Join we're procesing
+      dep_tables   Tables that we check to be functionally dependent (on
+                   everything else)
+      it           Iterator that enumerates these tables, or NULL if we're 
+                   checking one single table and it is specified in oj_tbl
+                   parameter.
+      oj_tbl       NULL, or one single table that we're checking
+      cond         Condition to use to prove functional dependency
+
+  DESCRIPTION
+    Check if we can use given condition to infer that the set of given tables
+    is functionally dependent on everything else.
+
+  RETURN 
+    TRUE  - Yes, functionally dependent
+    FALSE - No, or error
+*/
+
+static
+bool check_func_dependency(JOIN *join,
+                           table_map dep_tables,
+                           List_iterator<TABLE_LIST> *it, 
+                           TABLE_LIST *oj_tbl,
+                           Item* cond)
+{
+  Dep_analysis_context dac;
+  
+  /* 
+    Pre-alloc some Dep_module_expr structures. We don't need this to be
+    guaranteed upper bound.
+  */
+  dac.n_equality_mods_alloced= 
+    join->thd->lex->current_select->max_equal_elems +
+    (join->thd->lex->current_select->cond_count+1)*2 +
+    join->thd->lex->current_select->between_count;
+
+  bzero(dac.table_deps, sizeof(dac.table_deps));
+  if (!(dac.equality_mods= new Dep_module_expr[dac.n_equality_mods_alloced]))
+    return FALSE; /* purecov: inspected */
+
+  Dep_module_expr* last_eq_mod= dac.equality_mods;
+  
+  /* Create Dep_value_table objects for all tables we're trying to eliminate */
+  if (oj_tbl)
+  {
+    if (!dac.create_table_value(oj_tbl->table))
+      return FALSE; /* purecov: inspected */
+  }
+  else
+  {
+    TABLE_LIST *tbl; 
+    while ((tbl= (*it)++))
+    {
+      if (tbl->table && (tbl->table->map & dep_tables))
+      {
+        if (!dac.create_table_value(tbl->table))
+          return FALSE; /* purecov: inspected */
+      }
+    }
+  }
+  dac.usable_tables= dep_tables;
+
+  /*
+    Analyze the the ON expression and create Dep_module_expr objects and
+      Dep_value_field objects for the used fields.
+  */
+  uint and_level=0;
+  build_eq_mods_for_cond(&dac, &last_eq_mod, &and_level, cond);
+  if (!(dac.n_equality_mods= last_eq_mod - dac.equality_mods))
+    return FALSE;  /* No useful conditions */
+
+  List<Dep_module> bound_modules;
+
+  if (!(dac.outer_join_dep= new Dep_module_goal(my_count_bits(dep_tables))) ||
+      dac.setup_equality_modules_deps(&bound_modules))
+  {
+    return FALSE; /* OOM, default to non-dependent */ /* purecov: inspected */
+  }
+  
+  DBUG_EXECUTE("test", dac.dbug_print_deps(); );
+  
+  return dac.run_wave(&bound_modules);
+}
+
+
+/*
+  Running wave functional dependency check algorithm
+
+  SYNOPSIS
+   Dep_analysis_context::run_wave()
+     new_bound_modules  List of bound modules to start the running wave from. 
+                        The list is destroyed during execution
+  
+  DESCRIPTION
+    This function uses running wave algorithm to check if the join nest is
+    functionally-dependent. 
+    We start from provided list of bound modules, and then run the wave across 
+    dependency edges, trying the reach the Dep_module_goal module. If we manage
+    to reach it, then the join nest is functionally-dependent, otherwise it is
+    not.
+
+  RETURN 
+    TRUE   Yes, functionally dependent
+    FALSE  No.
+*/
+
+bool Dep_analysis_context::run_wave(List<Dep_module> *new_bound_modules)
+{
+  List<Dep_value> new_bound_values;
+  
+  Dep_value *value;
+  Dep_module *module;
+
+  while (!new_bound_modules->is_empty())
+  {
+    /*
+      The "wave" is in new_bound_modules list. Iterate over values that can be
+      reached from these modules but are not yet bound, and collect the next
+      wave generation in new_bound_values list.
+    */
+    List_iterator<Dep_module> modules_it(*new_bound_modules);
+    while ((module= modules_it++))
+    {
+      char iter_buf[Dep_module::iterator_size + ALIGN_MAX_UNIT];
+      Dep_module::Iterator iter;
+      iter= module->init_unbound_values_iter(iter_buf);
+      while ((value= module->get_next_unbound_value(this, iter)))
+      {
+        value->make_bound();
+        new_bound_values.push_back(value);
+      }
+    }
+    new_bound_modules->empty();
+    
+    /*
+      Now walk over list of values we've just found to be bound and check which
+      unbound modules can be reached from them. If there are some modules that
+      became bound, collect them in new_bound_modules list.
+    */
+    List_iterator<Dep_value> value_it(new_bound_values);
+    while ((value= value_it++))
+    {
+      char iter_buf[Dep_value::iterator_size + ALIGN_MAX_UNIT];
+      Dep_value::Iterator iter;
+      iter= value->init_unbound_modules_iter(iter_buf);
+      while ((module= value->get_next_unbound_module(this, iter)))
+      {
+        module->touch();
+        if (!module->is_applicable())
+          continue;
+        if (module->is_final())
+          return TRUE; /* Functionally dependent */
+        module->make_bound();
+        new_bound_modules->push_back(module);
+      }
+    }
+    new_bound_values.empty();
+  }
+  return FALSE;
+}
+
+
+/*
+  This is used to analyze expressions in "tbl.col=expr" dependencies so
+  that we can figure out which fields the expression depends on.
+*/
+
+class Field_dependency_recorder : public Field_enumerator
+{
+public:
+  Field_dependency_recorder(Dep_analysis_context *ctx_arg): ctx(ctx_arg)
+  {}
+  
+  void visit_field(Item_field *item)
+  {
+    Field *field= item->field;
+    Dep_value_table *tbl_dep;
+    if ((tbl_dep= ctx->table_deps[field->table->tablenr]))
+    {
+      for (Dep_value_field *field_dep= tbl_dep->fields; field_dep; 
+           field_dep= field_dep->next_table_field)
+      {
+        if (field->field_index == field_dep->field->field_index)
+        {
+          uint offs= field_dep->bitmap_offset + expr_offset;
+          if (!bitmap_is_set(&ctx->expr_deps, offs))
+            ctx->equality_mods[expr_offset].unbound_args++;
+          bitmap_set_bit(&ctx->expr_deps, offs);
+          return;
+        }
+      }
+      /* 
+        We got here if didn't find this field. It's not a part of 
+        a unique key, and/or there is no field=expr element for it.
+        Bump the dependency anyway, this will signal that this dependency
+        cannot be satisfied.
+      */
+      ctx->equality_mods[expr_offset].unbound_args++;
+    }
+    else
+      visited_other_tables= TRUE;
+  }
+
+  Dep_analysis_context *ctx;
+  /* Offset of the expression we're processing in the dependency bitmap */
+  uint expr_offset;
+
+  bool visited_other_tables;
+};
+
+
+
+
+/*
+  Setup inbound dependency relationships for tbl.col=expr equalities
+ 
+  SYNOPSIS
+    setup_equality_modules_deps()
+      bound_deps_list  Put here modules that were found not to depend on 
+                       any non-bound columns.
+
+  DESCRIPTION
+    Setup inbound dependency relationships for tbl.col=expr equalities:
+      - allocate a bitmap where we store such dependencies
+      - for each "tbl.col=expr" equality, analyze the expr part and find out
+        which fields it refers to and set appropriate dependencies.
+    
+  RETURN
+    FALSE  OK
+    TRUE   Out of memory
+*/
+
+bool Dep_analysis_context::setup_equality_modules_deps(List<Dep_module> 
+                                                       *bound_modules)
+{
+  DBUG_ENTER("setup_equality_modules_deps");
+ 
+  /*
+    Count Dep_value_field objects and assign each of them a unique 
+    bitmap_offset value.
+  */
+  uint offset= 0;
+  for (Dep_value_table **tbl_dep= table_deps; 
+       tbl_dep < table_deps + MAX_TABLES;
+       tbl_dep++)
+  {
+    if (*tbl_dep)
+    {
+      for (Dep_value_field *field_dep= (*tbl_dep)->fields;
+           field_dep;
+           field_dep= field_dep->next_table_field)
+      {
+        field_dep->bitmap_offset= offset;
+        offset += n_equality_mods;
+      }
+    }
+  }
+ 
+  void *buf;
+  if (!(buf= current_thd->alloc(bitmap_buffer_size(offset))) ||
+      bitmap_init(&expr_deps, (my_bitmap_map*)buf, offset, FALSE))
+  {
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  }
+  bitmap_clear_all(&expr_deps);
+
+  /* 
+    Analyze all "field=expr" dependencies, and have expr_deps encode
+    dependencies of expressions from fields.
+
+    Also collect a linked list of equalities that are bound.
+  */
+  Field_dependency_recorder deps_recorder(this);
+  for (Dep_module_expr *eq_mod= equality_mods; 
+       eq_mod < equality_mods + n_equality_mods;
+       eq_mod++)
+  {
+    deps_recorder.expr_offset= eq_mod - equality_mods;
+    deps_recorder.visited_other_tables= FALSE;
+    eq_mod->unbound_args= 0;
+    
+    if (eq_mod->field)
+    {
+      /* Regular tbl.col=expr(tblX1.col1, tblY1.col2, ...) */
+      eq_mod->expr->walk(&Item::enumerate_field_refs_processor, FALSE, 
+                               (uchar*)&deps_recorder);
+    }
+    else 
+    {
+      /* It's a multi-equality */
+      eq_mod->unbound_args= !test(eq_mod->expr);
+      List_iterator<Dep_value_field> it(*eq_mod->mult_equal_fields);
+      Dep_value_field* field_val;
+      while ((field_val= it++))
+      {
+        uint offs= field_val->bitmap_offset + eq_mod - equality_mods;
+        bitmap_set_bit(&expr_deps, offs);
+      }
+    }
+
+    if (!eq_mod->unbound_args)
+      bound_modules->push_back(eq_mod);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Ordering that we're using whenever we need to maintain a no-duplicates list
+  of field value objects.
+*/
+
+static 
+int compare_field_values(Dep_value_field *a, Dep_value_field *b, void *unused)
+{
+  uint a_ratio= a->field->table->tablenr*MAX_FIELDS +
+                a->field->field_index;
+
+  uint b_ratio= b->field->table->tablenr*MAX_FIELDS +
+                b->field->field_index;
+  return (a_ratio < b_ratio)? -1 : ((a_ratio == b_ratio)? 0 : 1);
+}
+
+
+/*
+  Produce Dep_module_expr elements for given condition.
+
+  SYNOPSIS
+    build_eq_mods_for_cond()
+      ctx              Table elimination context
+      eq_mod    INOUT  Put produced equality conditions here
+      and_level INOUT  AND-level (like in add_key_fields)
+      cond             Condition to process
+
+  DESCRIPTION
+    Analyze the given condition and produce an array of Dep_module_expr 
+    dependencies from it. The idea of analysis is as follows:
+    There are useful equalities that have form 
+        
+        eliminable_tbl.field = expr      (denote as useful_equality)
+
+    The condition is composed of useful equalities and other conditions that
+    are combined together with AND and OR operators. We process the condition
+    in recursive fashion according to these basic rules:
+
+      useful_equality1 AND useful_equality2 -> make array of two 
+                                               Dep_module_expr objects
+
+      useful_equality AND other_cond -> discard other_cond
+      
+      useful_equality OR other_cond -> discard everything
+      
+      useful_equality1 OR useful_equality2 -> check if both sides of OR are the
+                                              same equality. If yes, that's the
+                                              result, otherwise discard 
+                                              everything.
+
+    The rules are used to map the condition into an array Dep_module_expr
+    elements. The array will specify functional dependencies that logically 
+    follow from the condition.
+
+  SEE ALSO
+    This function is modeled after add_key_fields()
+*/
+
+static 
+void build_eq_mods_for_cond(Dep_analysis_context *ctx, 
+                            Dep_module_expr **eq_mod,
+                            uint *and_level, Item *cond)
+{
+  if (cond->type() == Item_func::COND_ITEM)
+  {
+    List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
+    uint orig_offset= *eq_mod - ctx->equality_mods;
+    
+    /* AND/OR */
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      Item *item;
+      while ((item=li++))
+        build_eq_mods_for_cond(ctx, eq_mod, and_level, item);
+
+      for (Dep_module_expr *mod_exp= ctx->equality_mods + orig_offset;
+           mod_exp != *eq_mod ; mod_exp++)
+      {
+        mod_exp->level= *and_level;
+      }
+    }
+    else
+    {
+      Item *item;
+      (*and_level)++;
+      build_eq_mods_for_cond(ctx, eq_mod, and_level, li++);
+      while ((item=li++))
+      {
+        Dep_module_expr *start_key_fields= *eq_mod;
+        (*and_level)++;
+        build_eq_mods_for_cond(ctx, eq_mod, and_level, item);
+        *eq_mod= merge_eq_mods(ctx->equality_mods + orig_offset, 
+                               start_key_fields, *eq_mod,
+                               ++(*and_level));
+      }
+    }
+    return;
+  }
+
+  if (cond->type() != Item::FUNC_ITEM)
+    return;
+
+  Item_func *cond_func= (Item_func*) cond;
+  Item **args= cond_func->arguments();
+
+  switch (cond_func->functype()) {
+  case Item_func::BETWEEN:
+  {
+    Item *fld;
+    if (!((Item_func_between*)cond)->negated &&
+        (fld= args[0]->real_item())->type() == Item::FIELD_ITEM &&
+        args[1]->eq(args[2], ((Item_field*)fld)->field->binary()))
+    {
+      check_equality(ctx, eq_mod, *and_level, cond_func, args[0], args[1]);
+      check_equality(ctx, eq_mod, *and_level, cond_func, args[1], args[0]);
+    }
+    break;
+  }
+  case Item_func::EQ_FUNC:
+  case Item_func::EQUAL_FUNC:
+  {
+    check_equality(ctx, eq_mod, *and_level, cond_func, args[0], args[1]);
+    check_equality(ctx, eq_mod, *and_level, cond_func, args[1], args[0]);
+    break;
+  }
+  case Item_func::ISNULL_FUNC:
+  {
+    Item *tmp=new Item_null;
+    if (tmp)
+      check_equality(ctx, eq_mod, *and_level, cond_func, args[0], tmp);
+    break;
+  }
+  case Item_func::MULT_EQUAL_FUNC:
+  {
+    /*
+      The condition is a 
+
+        tbl1.field1 = tbl2.field2 = tbl3.field3 [= const_expr]
+
+      multiple-equality. Do two things:
+       - Collect List<Dep_value_field> of tblX.colY where tblX is one of the
+         tables we're trying to eliminate.
+       - rembember if there was a bound value, either const_expr or tblY.colZ
+         swher tblY is not a table that we're trying to eliminate.
+      Store all collected information in a Dep_module_expr object.
+    */
+    Item_equal *item_equal= (Item_equal*)cond;
+    List<Dep_value_field> *fvl;
+    if (!(fvl= new List<Dep_value_field>))
+      break; /* purecov: inspected */
+
+    Item_equal_fields_iterator it(*item_equal);
+    Item *item;
+    Item *bound_item= item_equal->get_const();
+    while ((item= it++))
+    {
+      Field *equal_field= it.get_curr_field();
+      if ((item->used_tables() & ctx->usable_tables))
+      {
+        Dep_value_field *field_val;
+        if ((field_val= ctx->get_field_value(equal_field)))
+          fvl->push_back(field_val);
+      }
+      else
+      {
+        if (!bound_item)
+          bound_item= item;
+      }
+    }
+    /* 
+      Multiple equality is only useful if it includes at least one field from
+      the table that we could potentially eliminate:
+    */
+    if (fvl->elements)
+    {
+      
+      bubble_sort<Dep_value_field>(fvl, compare_field_values, NULL);
+      add_module_expr(ctx, eq_mod, *and_level, NULL, bound_item, fvl);
+    }
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+
+/*
+  Perform an OR operation on two (adjacent) Dep_module_expr arrays.
+
+  SYNOPSIS
+     merge_eq_mods()
+       start        Start of left OR-part
+       new_fields   Start of right OR-part
+       end          End of right OR-part
+       and_level    AND-level (like in add_key_fields)
+
+  DESCRIPTION
+  This function is invoked for two adjacent arrays of Dep_module_expr elements:
+
+                      $LEFT_PART             $RIGHT_PART
+             +-----------------------+-----------------------+
+            start                new_fields                 end
+         
+  The goal is to produce an array which would correspond to the combined 
+  
+    $LEFT_PART OR $RIGHT_PART
+  
+  condition. This is achieved as follows: First, we apply distrubutive law:
+  
+    (fdep_A_1 AND fdep_A_2 AND ...)  OR  (fdep_B_1 AND fdep_B_2 AND ...) =
+
+     = AND_ij (fdep_A_[i] OR fdep_B_[j])
+  
+  Then we walk over the obtained "fdep_A_[i] OR fdep_B_[j]" pairs, and 
+   - Discard those that that have left and right part referring to different
+     columns. We can't infer anything useful from "col1=expr1 OR col2=expr2".
+   - When left and right parts refer to the same column,  we check if they are 
+     essentially the same. 
+     = If they are the same, we keep one copy 
+       "t.col=expr OR t.col=expr"  -> "t.col=expr 
+     = if they are different , then we discard both
+      "t.col=expr1 OR t.col=expr2" -> (nothing useful)
+
+  (no per-table or for-index FUNC_DEPS exist yet at this phase).
+
+  See also merge_key_fields().
+
+  RETURN 
+    End of the result array
+*/
+
+static 
+Dep_module_expr *merge_eq_mods(Dep_module_expr *start, 
+                               Dep_module_expr *new_fields,
+                               Dep_module_expr *end, uint and_level)
+{
+  if (start == new_fields)
+    return start;  /*  (nothing) OR (...) -> (nothing) */
+  if (new_fields == end)
+    return start;  /*  (...) OR (nothing) -> (nothing) */
+
+  Dep_module_expr *first_free= new_fields;
+
+  for (; new_fields != end ; new_fields++)
+  {
+    for (Dep_module_expr *old=start ; old != first_free ; old++)
+    {
+      if (old->field == new_fields->field)
+      {
+        if (!old->field)
+        {
+          /*
+            OR-ing two multiple equalities. We must compute an intersection of
+            used fields, and check the constants according to these rules:
+
+              a=b=c=d  OR a=c=e=f   ->  a=c  (compute intersection)
+              a=const1 OR a=b       ->  (nothing)
+              a=const1 OR a=const1  ->  a=const1 
+              a=const1 OR a=const2  ->  (nothing)
+            
+            If we're performing an OR operation over multiple equalities, e.g.
+
+              (a=b=c AND p=q) OR (a=b AND v=z)
+            
+            then we'll need to try combining each equality with each. ANDed
+            equalities are guaranteed to be disjoint, so we'll only get one
+            hit.
+          */
+          Field *eq_field= old->mult_equal_fields->head()->field;
+          if (old->expr && new_fields->expr &&
+              old->expr->eq_by_collation(new_fields->expr, eq_field->binary(),
+                                         eq_field->charset()))
+          {
+            /* Ok, keep */
+          }
+          else
+          {
+            /* no single constant/bound item. */
+            old->expr= NULL;
+          }
+           
+          List <Dep_value_field> *fv;
+          if (!(fv= new List<Dep_value_field>))
+            break; /* purecov: inspected */
+
+          List_iterator<Dep_value_field> it1(*old->mult_equal_fields);
+          List_iterator<Dep_value_field> it2(*new_fields->mult_equal_fields);
+          Dep_value_field *lfield= it1++;
+          Dep_value_field *rfield= it2++;
+          /* Intersect two ordered lists */
+          while (lfield && rfield)
+          {
+            if (lfield == rfield)
+            {
+              fv->push_back(lfield);
+              lfield=it1++;
+              rfield=it2++;
+            }
+            else
+            {
+              if (compare_field_values(lfield, rfield, NULL) < 0)
+                lfield= it1++;
+              else
+                rfield= it2++;
+            }
+          }
+
+          if (fv->elements + test(old->expr) > 1)
+          {
+            old->mult_equal_fields= fv;
+            old->level= and_level;
+          }
+        }
+        else if (!new_fields->expr->const_item())
+        {
+          /*
+            If the value matches, we can use the key reference.
+            If not, we keep it until we have examined all new values
+          */
+          if (old->expr->eq(new_fields->expr, 
+                            old->field->field->binary()))
+          {
+            old->level= and_level;
+          }
+        }
+        else if (old->expr->eq_by_collation(new_fields->expr,
+                                            old->field->field->binary(),
+                                            old->field->field->charset()))
+        {
+          old->level= and_level;
+        }
+        else
+        {
+          /* The expressions are different. */
+          if (old == --first_free)                // If last item
+            break;
+          *old= *first_free;                        // Remove old value
+          old--;                                // Retry this value
+        }
+      }
+    }
+  }
+
+  /* 
+    Ok, the results are within the [start, first_free) range, and the useful
+    elements have level==and_level. Now, remove all unusable elements:
+  */
+  for (Dep_module_expr *old=start ; old != first_free ;)
+  {
+    if (old->level != and_level)
+    {                                                // Not used in all levels
+      if (old == --first_free)
+        break;
+      *old= *first_free;                        // Remove old value
+      continue;
+    }
+    old++;
+  }
+  return first_free;
+}
+
+
+/*
+  Add an Dep_module_expr element for left=right condition
+
+  SYNOPSIS
+    check_equality()
+      fda               Table elimination context
+      eq_mod     INOUT  Store created Dep_module_expr here and increment ptr if
+                        you do so
+      and_level         AND-level (like in add_key_fields)
+      cond              Condition we've inferred the left=right equality from.
+      left              Left expression
+      right             Right expression
+      usable_tables     Create Dep_module_expr only if Left_expression's table 
+                        belongs to this set.
+
+  DESCRIPTION 
+    Check if the passed left=right equality is such that 
+     - 'left' is an Item_field referring to a field in a table we're checking
+       to be functionally depdendent,
+     - the equality allows to conclude that 'left' expression is functionally 
+       dependent on the 'right',
+    and if so, create an Dep_module_expr object.
+*/
+
+static 
+void check_equality(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
+                    uint and_level, Item_func *cond, Item *left, Item *right)
+{
+  if ((left->used_tables() & ctx->usable_tables) &&
+      !(right->used_tables() & RAND_TABLE_BIT) &&
+      left->real_item()->type() == Item::FIELD_ITEM)
+  {
+    Field *field= ((Item_field*)left->real_item())->field;
+    if (field->result_type() == STRING_RESULT)
+    {
+      if (right->result_type() != STRING_RESULT)
+      {
+        if (field->cmp_type() != right->result_type())
+          return;
+      }
+      else
+      {
+        /*
+          We can't assume there's a functional dependency if the effective
+          collation of the operation differ from the field collation.
+        */
+        if (field->cmp_type() == STRING_RESULT &&
+            ((Field_str*)field)->charset() != cond->compare_collation())
+          return;
+      }
+    }
+    Dep_value_field *field_val;
+    if ((field_val= ctx->get_field_value(field)))
+      add_module_expr(ctx, eq_mod, and_level, field_val, right, NULL);
+  }
+}
+
+
+/* 
+  Add a Dep_module_expr object with the specified parameters. 
+  
+  DESCRIPTION
+    Add a Dep_module_expr object with the specified parameters. Re-allocate
+    the ctx->equality_mods array if it has no space left.
+*/
+
+static 
+void add_module_expr(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
+                     uint and_level, Dep_value_field *field_val, 
+                     Item *right, List<Dep_value_field>* mult_equal_fields)
+{
+  if (*eq_mod == ctx->equality_mods + ctx->n_equality_mods_alloced)
+  {
+    /* 
+      We've filled the entire equality_mods array. Replace it with a bigger
+      one. We do it somewhat inefficiently but it doesn't matter.
+    */
+    /* purecov: begin inspected */
+    Dep_module_expr *new_arr;
+    if (!(new_arr= new Dep_module_expr[ctx->n_equality_mods_alloced *2]))
+      return;
+    ctx->n_equality_mods_alloced *= 2;
+    for (int i= 0; i < *eq_mod - ctx->equality_mods; i++)
+      new_arr[i]= ctx->equality_mods[i];
+
+    ctx->equality_mods= new_arr;
+    *eq_mod= new_arr + (*eq_mod - ctx->equality_mods);
+    /* purecov: end */
+  }
+
+  (*eq_mod)->field= field_val;
+  (*eq_mod)->expr= right;
+  (*eq_mod)->level= and_level;
+  (*eq_mod)->mult_equal_fields= mult_equal_fields;
+  (*eq_mod)++;
+}
+
+
+/*
+  Create a Dep_value_table object for the given table
+
+  SYNOPSIS
+    Dep_analysis_context::create_table_value()
+      table  Table to create object for
+
+  DESCRIPTION
+    Create a Dep_value_table object for the given table. Also create
+    Dep_module_key objects for all unique keys in the table.
+
+  RETURN
+    Created table value object
+    NULL if out of memory
+*/
+
+Dep_value_table *Dep_analysis_context::create_table_value(TABLE *table)
+{
+  Dep_value_table *tbl_dep;
+  if (!(tbl_dep= new Dep_value_table(table)))
+    return NULL; /* purecov: inspected */
+
+  Dep_module_key **key_list= &(tbl_dep->keys);
+  /* Add dependencies for unique keys */
+  for (uint i=0; i < table->s->keys; i++)
+  {
+    KEY *key= table->key_info + i; 
+    if (key->flags & HA_NOSAME)
+    {
+      Dep_module_key *key_dep;
+      if (!(key_dep= new Dep_module_key(tbl_dep, i, key->key_parts)))
+        return NULL;
+      *key_list= key_dep;
+      key_list= &(key_dep->next_table_key);
+    }
+  }
+  return table_deps[table->tablenr]= tbl_dep;
+}
+
+
+/* 
+  Get a Dep_value_field object for the given field, creating it if necessary
+
+  SYNOPSIS
+   Dep_analysis_context::get_field_value()
+      field  Field to create object for
+        
+  DESCRIPTION
+    Get a Dep_value_field object for the given field. First, we search for it 
+    in the list of Dep_value_field objects we have already created. If we don't 
+    find it, we create a new Dep_value_field and put it into the list of field
+    objects we have for the table.
+
+  RETURN
+    Created field value object
+    NULL if out of memory
+*/
+
+Dep_value_field *Dep_analysis_context::get_field_value(Field *field)
+{
+  TABLE *table= field->table;
+  Dep_value_table *tbl_dep= table_deps[table->tablenr];
+
+  /* Try finding the field in field list */
+  Dep_value_field **pfield= &(tbl_dep->fields);
+  while (*pfield && (*pfield)->field->field_index < field->field_index)
+  {
+    pfield= &((*pfield)->next_table_field);
+  }
+  if (*pfield && (*pfield)->field->field_index == field->field_index)
+    return *pfield;
+  
+  /* Create the field and insert it in the list */
+  Dep_value_field *new_field= new Dep_value_field(tbl_dep, field);
+  new_field->next_table_field= *pfield;
+  *pfield= new_field;
+
+  return new_field;
+}
+
+
+/* 
+  Iteration over unbound modules that are our dependencies.
+  for those we have:
+    - dependendencies of our fields
+    - outer join we're in 
+*/
+char *Dep_value_table::init_unbound_modules_iter(char *buf)
+{
+  Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter);
+  iter->field_dep= fields;
+  if (fields)
+  {
+    fields->init_unbound_modules_iter(iter->buf);
+    fields->make_unbound_modules_iter_skip_keys(iter->buf);
+  }
+  iter->returned_goal= FALSE;
+  return (char*)iter;
+}
+
+
+Dep_module* 
+Dep_value_table::get_next_unbound_module(Dep_analysis_context *dac,
+                                         char *iter)
+{
+  Module_iter *di= (Module_iter*)iter;
+  while (di->field_dep)
+  {
+    Dep_module *res;
+    if ((res= di->field_dep->get_next_unbound_module(dac, di->buf)))
+      return res;
+    if ((di->field_dep= di->field_dep->next_table_field))
+    {
+      char *field_iter= ((Module_iter*)iter)->buf;
+      di->field_dep->init_unbound_modules_iter(field_iter);
+      di->field_dep->make_unbound_modules_iter_skip_keys(field_iter);
+    }
+  }
+  
+  if (!di->returned_goal)
+  {
+    di->returned_goal= TRUE;
+    return dac->outer_join_dep;
+  }
+  return NULL;
+}
+
+
+char *Dep_module_expr::init_unbound_values_iter(char *buf)
+{
+  Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter);
+  iter->field= field;
+  if (!field)
+  {
+    new (&iter->it) List_iterator<Dep_value_field>(*mult_equal_fields);
+  }
+  return (char*)iter;
+}
+
+
+Dep_value* Dep_module_expr::get_next_unbound_value(Dep_analysis_context *dac,
+                                                   char *buf)
+{
+  Dep_value *res;
+  if (field)
+  {
+    res= ((Value_iter*)buf)->field;
+    ((Value_iter*)buf)->field= NULL;
+    return (!res || res->is_bound())? NULL : res;
+  }
+  else
+  {
+    while ((res= ((Value_iter*)buf)->it++))
+    {
+      if (!res->is_bound())
+        return res;
+    }
+    return NULL;
+  }
+}
+
+
+char *Dep_module_key::init_unbound_values_iter(char *buf)
+{
+  Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter);
+  iter->table= table;
+  return (char*)iter;
+}
+
+
+Dep_value* Dep_module_key::get_next_unbound_value(Dep_analysis_context *dac,
+                                                  Dep_module::Iterator iter)
+{
+  Dep_value* res= ((Value_iter*)iter)->table;
+  ((Value_iter*)iter)->table= NULL;
+  return res;
+}
+
+
+Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf)
+{
+  Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter);
+  iter->key_dep= table->keys;
+  iter->equality_no= 0;
+  return (char*)iter;
+}
+
+
+void 
+Dep_value_field::make_unbound_modules_iter_skip_keys(Dep_value::Iterator iter)
+{
+  ((Module_iter*)iter)->key_dep= NULL;
+}
+
+
+Dep_module* Dep_value_field::get_next_unbound_module(Dep_analysis_context *dac,
+                                                     Dep_value::Iterator iter)
+{
+  Module_iter *di= (Module_iter*)iter;
+  Dep_module_key *key_dep= di->key_dep;
+  
+  /* 
+    First, enumerate all unique keys that are 
+    - not yet applicable
+    - have this field as a part of them
+  */
+  while (key_dep && (key_dep->is_applicable() ||
+         !field->part_of_key.is_set(key_dep->keyno)))
+  {
+    key_dep= key_dep->next_table_key;
+  }
+
+  if (key_dep)
+  {
+    di->key_dep= key_dep->next_table_key;
+    return key_dep;
+  }
+  else 
+    di->key_dep= NULL;
+  
+  /*
+    Then walk through [multi]equalities and find those that
+     - depend on this field
+     - and are not bound yet.
+  */
+  uint eq_no= di->equality_no;
+  while (eq_no < dac->n_equality_mods && 
+         (!bitmap_is_set(&dac->expr_deps, bitmap_offset + eq_no) ||
+         dac->equality_mods[eq_no].is_applicable()))
+  {
+    eq_no++;
+  }
+  
+  if (eq_no < dac->n_equality_mods)
+  {
+    di->equality_no= eq_no+1;
+    return &dac->equality_mods[eq_no];
+  }
+  return NULL;
+}
+
+
+/* 
+  Mark one table or the whole join nest as eliminated.
+*/
+
+static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl)
+{
+  TABLE *table;
+  /*
+    NOTE: there are TABLE_LIST object that have
+    tbl->table!= NULL && tbl->nested_join!=NULL and 
+    tbl->table == tbl->nested_join->join_list->element(..)->table
+  */
+  if (tbl->nested_join)
+  {
+    TABLE_LIST *child;
+    List_iterator<TABLE_LIST> it(tbl->nested_join->join_list);
+    while ((child= it++))
+      mark_as_eliminated(join, child);
+  }
+  else if ((table= tbl->table))
+  {
+    JOIN_TAB *tab= tbl->table->reginfo.join_tab;
+    if (!(join->const_table_map & tab->table->map))
+    {
+      DBUG_PRINT("info", ("Eliminated table %s", table->alias.c_ptr()));
+      tab->type= JT_CONST;
+      join->eliminated_tables |= table->map;
+      join->const_table_map|= table->map;
+      set_position(join, join->const_tables++, tab, (KEYUSE*)0);
+    }
+  }
+
+  if (tbl->on_expr)
+    tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
+}
+
+
+#ifndef DBUG_OFF
+/* purecov: begin inspected */
+void Dep_analysis_context::dbug_print_deps()
+{
+  DBUG_ENTER("dbug_print_deps");
+  DBUG_LOCK_FILE;
+  
+  fprintf(DBUG_FILE,"deps {\n");
+  
+  /* Start with printing equalities */
+  for (Dep_module_expr *eq_mod= equality_mods; 
+       eq_mod != equality_mods + n_equality_mods; eq_mod++)
+  {
+    char buf[128];
+    String str(buf, sizeof(buf), &my_charset_bin);
+    str.length(0);
+    eq_mod->expr->print(&str, QT_ORDINARY);
+    if (eq_mod->field)
+    {
+      fprintf(DBUG_FILE, "  equality%ld: %s -> %s.%s\n", 
+              (long)(eq_mod - equality_mods),
+              str.c_ptr(),
+              eq_mod->field->table->table->alias.c_ptr(),
+              eq_mod->field->field->field_name);
+    }
+    else
+    {
+      fprintf(DBUG_FILE, "  equality%ld: multi-equality", 
+              (long)(eq_mod - equality_mods));
+    }
+  }
+  fprintf(DBUG_FILE,"\n");
+
+  /* Then tables and their fields */
+  for (uint i=0; i < MAX_TABLES; i++)
+  {
+    Dep_value_table *table_dep;
+    if ((table_dep= table_deps[i]))
+    {
+      /* Print table */
+      fprintf(DBUG_FILE, "  table %s\n", table_dep->table->alias.c_ptr());
+      /* Print fields */
+      for (Dep_value_field *field_dep= table_dep->fields; field_dep; 
+           field_dep= field_dep->next_table_field)
+      {
+        fprintf(DBUG_FILE, "    field %s.%s ->",
+                table_dep->table->alias.c_ptr(),
+                field_dep->field->field_name);
+        uint ofs= field_dep->bitmap_offset;
+        for (uint bit= ofs; bit < ofs + n_equality_mods; bit++)
+        {
+          if (bitmap_is_set(&expr_deps, bit))
+            fprintf(DBUG_FILE, " equality%d ", bit - ofs);
+        }
+        fprintf(DBUG_FILE, "\n");
+      }
+    }
+  }
+  fprintf(DBUG_FILE,"\n}\n");
+  DBUG_UNLOCK_FILE;
+  DBUG_VOID_RETURN;
+}
+/* purecov: end */
+
+#endif 
+/**
+  @} (end of group Table_Elimination)
+*/
+
diff --git a/sql/parse_file.cc b/sql/parse_file.cc
index 99fa33fdabb..699aa7e2b95 100644
--- a/sql/parse_file.cc
+++ b/sql/parse_file.cc
@@ -219,7 +219,7 @@ sql_create_definition_file(const LEX_STRING *dir, const LEX_STRING *file_name,
   File_option *param;
   DBUG_ENTER("sql_create_definition_file");
   DBUG_PRINT("enter", ("Dir: %s, file: %s, base 0x%lx",
-		       dir ? dir->str : "(null)",
+		       dir ? dir->str : "",
                        file_name->str, (ulong) base));
 
   if (dir)
@@ -386,7 +386,7 @@ sql_parse_prepare(const LEX_STRING *file_name, MEM_ROOT *mem_root,
     DBUG_RETURN(0);
   }
 
-  if (!(parser->buff= (char*) alloc_root(mem_root, stat_info.st_size+1)))
+  if (!(parser->buff= (char*) alloc_root(mem_root, (size_t)(stat_info.st_size+1))))
   {
     DBUG_RETURN(0);
   }
diff --git a/sql/parse_file.h b/sql/parse_file.h
index 3ceb8458039..20e3051e671 100644
--- a/sql/parse_file.h
+++ b/sql/parse_file.h
@@ -97,12 +97,12 @@ class File_parser: public Sql_alloc
 {
   char *buff, *start, *end;
   LEX_STRING file_type;
-  my_bool content_ok;
+  bool content_ok;
 public:
   File_parser() :buff(0), start(0), end(0), content_ok(0)
     { file_type.str= 0; file_type.length= 0; }
 
-  my_bool ok() { return content_ok; }
+  bool ok() { return content_ok; }
   LEX_STRING *type() { return &file_type; }
   my_bool parse(uchar* base, MEM_ROOT *mem_root,
 		struct File_option *parameters, uint required,
diff --git a/sql/partition_element.h b/sql/partition_element.h
index d6ee44078f5..87f3d00e68c 100644
--- a/sql/partition_element.h
+++ b/sql/partition_element.h
@@ -103,6 +103,7 @@ public:
   char* data_file_name;
   char* index_file_name;
   handlerton *engine_type;
+  LEX_STRING connect_string;
   enum partition_state part_state;
   uint16 nodegroup_id;
   bool has_null_value;
@@ -119,6 +120,8 @@ public:
     nodegroup_id(UNDEF_NODEGROUP), has_null_value(FALSE),
     signed_flag(FALSE), max_value(FALSE)
   {
+    connect_string.str= 0;
+    connect_string.length= 0;
   }
   partition_element(partition_element *part_elem)
   : part_max_rows(part_elem->part_max_rows),
@@ -129,10 +132,13 @@ public:
     data_file_name(part_elem->data_file_name),
     index_file_name(part_elem->index_file_name),
     engine_type(part_elem->engine_type),
+    connect_string(part_elem->connect_string),
     part_state(part_elem->part_state),
     nodegroup_id(part_elem->nodegroup_id),
     has_null_value(FALSE)
   {
+    connect_string.str= 0;
+    connect_string.length= 0;
   }
   ~partition_element() {}
 };
diff --git a/sql/partition_info.cc b/sql/partition_info.cc
index cbf22d18c87..e62b895a839 100644
--- a/sql/partition_info.cc
+++ b/sql/partition_info.cc
@@ -1356,7 +1356,7 @@ void partition_info::print_no_partition_found(TABLE *table_arg)
       if (part_expr->null_value)
         buf_ptr= (char*)"NULL";
       else
-        longlong2str(err_value, buf,
+        longlong10_to_str(err_value, buf,
                      part_expr->unsigned_flag ? 10 : -10);
       dbug_tmp_restore_column_map(table_arg->read_set, old_map);
     }
diff --git a/sql/password.c b/sql/password.c
index ca194204360..a7e599d4777 100644
--- a/sql/password.c
+++ b/sql/password.c
@@ -71,41 +71,12 @@
 /*
   New (MySQL 3.21+) random generation structure initialization
   SYNOPSIS
-    randominit()
+    my_rnd_init()
     rand_st    OUT  Structure to initialize
     seed1      IN   First initialization parameter
     seed2      IN   Second initialization parameter
 */
 
-void randominit(struct rand_struct *rand_st, ulong seed1, ulong seed2)
-{                                               /* For mysql 3.21.# */
-#ifdef HAVE_purify
-  bzero((char*) rand_st,sizeof(*rand_st));      /* Avoid UMC varnings */
-#endif
-  rand_st->max_value= 0x3FFFFFFFL;
-  rand_st->max_value_dbl=(double) rand_st->max_value;
-  rand_st->seed1=seed1%rand_st->max_value ;
-  rand_st->seed2=seed2%rand_st->max_value;
-}
-
-
-/*
-    Generate random number.
-  SYNOPSIS
-    my_rnd()
-    rand_st    INOUT  Structure used for number generation
-  RETURN VALUE
-    generated pseudo random number
-*/
-
-double my_rnd(struct rand_struct *rand_st)
-{
-  rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value;
-  rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
-  return (((double) rand_st->seed1)/rand_st->max_value_dbl);
-}
-
-
 /*
     Generate binary hash from raw text string 
     Used for Pre-4.1 password handling
@@ -185,7 +156,7 @@ void make_scrambled_password_323(char *to, const char *password)
 
 void scramble_323(char *to, const char *message, const char *password)
 {
-  struct rand_struct rand_st;
+  struct my_rnd_struct rand_st;
   ulong hash_pass[2], hash_message[2];
 
   if (password && password[0])
@@ -194,7 +165,7 @@ void scramble_323(char *to, const char *message, const char *password)
     const char *message_end= message + SCRAMBLE_LENGTH_323;
     hash_password(hash_pass,password, (uint) strlen(password));
     hash_password(hash_message, message, SCRAMBLE_LENGTH_323);
-    randominit(&rand_st,hash_pass[0] ^ hash_message[0],
+    my_rnd_init(&rand_st,hash_pass[0] ^ hash_message[0],
                hash_pass[1] ^ hash_message[1]);
     for (; message < message_end; message++)
       *to++= (char) (floor(my_rnd(&rand_st)*31)+64);
@@ -222,7 +193,7 @@ my_bool
 check_scramble_323(const unsigned char *scrambled, const char *message,
                    ulong *hash_pass)
 {
-  struct rand_struct rand_st;
+  struct my_rnd_struct rand_st;
   ulong hash_message[2];
   /* Big enough for checks. */
   uchar buff[16], scrambled_buff[SCRAMBLE_LENGTH_323 + 1];
@@ -235,7 +206,7 @@ check_scramble_323(const unsigned char *scrambled, const char *message,
   scrambled= scrambled_buff;
 
   hash_password(hash_message, message, SCRAMBLE_LENGTH_323);
-  randominit(&rand_st,hash_pass[0] ^ hash_message[0],
+  my_rnd_init(&rand_st,hash_pass[0] ^ hash_message[0],
              hash_pass[1] ^ hash_message[1]);
   to=buff;
   DBUG_ASSERT(sizeof(buff) > SCRAMBLE_LENGTH_323);
@@ -316,7 +287,8 @@ void make_password_from_salt_323(char *to, const ulong *salt)
     rand_st  INOUT structure used for number generation
 */
 
-void create_random_string(char *to, uint length, struct rand_struct *rand_st)
+void create_random_string(char *to, uint length,
+                          struct my_rnd_struct *rand_st)
 {
   char *end= to + length;
   /* Use pointer arithmetics as it is faster way to do so. */
diff --git a/sql/procedure.h b/sql/procedure.h
index aa5a973c4cc..6870b97de57 100644
--- a/sql/procedure.h
+++ b/sql/procedure.h
@@ -16,7 +16,6 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 
-
 /* When using sql procedures */
 
 #ifdef USE_PRAGMA_INTERFACE
@@ -53,7 +52,11 @@ public:
   {
     init_make_field(tmp_field,field_type());
   }
-  unsigned int size_of() { return sizeof(*this);}  
+  unsigned int size_of() { return sizeof(*this);}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("proc"); 
+  }
 };
 
 class Item_proc_real :public Item_proc
diff --git a/sql/protocol.cc b/sql/protocol.cc
index 53ab032517e..eb9c3e34dbf 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -62,8 +63,6 @@ bool Protocol_binary::net_store_data(const uchar *from, size_t length)
 }
 
 
-
-
 /*
   net_store_data() - extended version with character set conversion.
   
@@ -83,6 +82,7 @@ bool Protocol::net_store_data(const uchar *from, size_t length,
   uint dummy_errors;
   /* Calculate maxumum possible result length */
   uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+
   if (conv_length > 250)
   {
     /*
@@ -524,7 +524,6 @@ void Protocol::end_statement()
   DBUG_VOID_RETURN;
 }
 
-
 /**
   A default implementation of "OK" packet response to the client.
 
@@ -575,6 +574,56 @@ bool Protocol::send_error(uint sql_errno, const char *err_msg,
 }
 
 
+/**
+   Send a progress report to the client
+
+   What we send is:
+   header (255,255,255,1)
+   stage, max_stage as on byte integers
+   percentage withing the stage as percentage*1000
+   (that is, ratio*100000) as a 3 byte integer
+   proc_info as a string
+*/
+
+const uchar progress_header[2]= {(uchar) 255, (uchar) 255 };
+
+void net_send_progress_packet(THD *thd)
+{
+  uchar buff[200], *pos;
+  const char *proc_info= thd->proc_info ? thd->proc_info : "";
+  uint length= strlen(proc_info);
+  ulonglong progress;
+  DBUG_ENTER("net_send_progress_packet");
+
+  if (unlikely(!thd->net.vio))
+    DBUG_VOID_RETURN;                           // Socket is closed
+
+  pos= buff;
+  /*
+    Store number of strings first. This allows us to later expand the
+    progress indicator if needed.
+  */
+  *pos++= (uchar) 1;                            // Number of strings
+  *pos++= (uchar) thd->progress.stage + 1;
+  /*
+    We have the max() here to avoid problems if max_stage is not set,
+    which may happen during automatic repair of table
+  */
+  *pos++= (uchar) max(thd->progress.max_stage, thd->progress.stage + 1);
+  progress= 0;
+  if (thd->progress.max_counter)
+    progress= 100000ULL * thd->progress.counter / thd->progress.max_counter;
+  int3store(pos, progress);                          // Between 0 & 100000
+  pos+= 3;
+  pos= net_store_data(pos, (const uchar*) proc_info,
+                      min(length, sizeof(buff)-7));
+  net_write_command(&thd->net, (uchar) 255, progress_header,
+                    sizeof(progress_header), (uchar*) buff,
+                    (uint) (pos - buff));
+  DBUG_VOID_RETURN;
+}
+
+  
 /****************************************************************************
   Functions used by the protocol functions (like net_send_ok) to store
   strings and numbers in the header result packet.
@@ -630,7 +679,8 @@ void Protocol::init(THD *thd_arg)
 
 void Protocol::end_partial_result_set(THD *thd_arg)
 {
-  net_send_eof(thd_arg, thd_arg->server_status, 0 /* no warnings, we're inside SP */);
+  net_send_eof(thd_arg, thd_arg->server_status,
+               0 /* no warnings, we're inside SP */);
 }
 
 
@@ -676,7 +726,7 @@ bool Protocol::send_result_set_metadata(List<Item> *list, uint flags)
   Protocol_text prot(thd);
   String *local_packet= prot.storage_packet();
   CHARSET_INFO *thd_charset= thd->variables.character_set_results;
-  DBUG_ENTER("send_result_set_metadata");
+  DBUG_ENTER("Protocol::send_result_set_metadata");
 
   if (flags & SEND_NUM_ROWS)
   {				// Packet with number of elements
@@ -691,6 +741,9 @@ bool Protocol::send_result_set_metadata(List<Item> *list, uint flags)
   uint count= 0;
 #endif
 
+  /* We have to reallocate it here as a stored procedure may have reset it */
+  (void) local_packet->alloc(thd->variables.net_buffer_length);
+
   while ((item=it++))
   {
     char *pos;
@@ -1118,13 +1171,7 @@ bool Protocol_text::store(Field *field)
 }
 
 
-/**
-  @todo
-    Second_part format ("%06") needs to change when 
-    we support 0-6 decimals for time.
-*/
-
-bool Protocol_text::store(MYSQL_TIME *tm)
+bool Protocol_text::store(MYSQL_TIME *tm, int decimals)
 {
 #ifndef DBUG_OFF
   DBUG_ASSERT(field_types == 0 ||
@@ -1132,14 +1179,8 @@ bool Protocol_text::store(MYSQL_TIME *tm)
 	      field_types[field_pos] == MYSQL_TYPE_TIMESTAMP);
   field_pos++;
 #endif
-  char buff[40];
-  uint length;
-  length= sprintf(buff, "%04d-%02d-%02d %02d:%02d:%02d",
-                  (int) tm->year, (int) tm->month,
-                  (int) tm->day, (int) tm->hour,
-                  (int) tm->minute, (int) tm->second);
-  if (tm->second_part)
-    length+= sprintf(buff+length, ".%06d", (int) tm->second_part);
+  char buff[MAX_DATE_STRING_REP_LENGTH];
+  uint length= my_datetime_to_str(tm, buff, decimals);
   return net_store_data((uchar*) buff, length);
 }
 
@@ -1157,27 +1198,15 @@ bool Protocol_text::store_date(MYSQL_TIME *tm)
 }
 
 
-/**
-  @todo 
-    Second_part format ("%06") needs to change when 
-    we support 0-6 decimals for time.
-*/
-
-bool Protocol_text::store_time(MYSQL_TIME *tm)
+bool Protocol_text::store_time(MYSQL_TIME *tm, int decimals)
 {
 #ifndef DBUG_OFF
   DBUG_ASSERT(field_types == 0 ||
 	      field_types[field_pos] == MYSQL_TYPE_TIME);
   field_pos++;
 #endif
-  char buff[40];
-  uint length;
-  uint day= (tm->year || tm->month) ? 0 : tm->day;
-  length= sprintf(buff, "%s%02ld:%02d:%02d", tm->neg ? "-" : "",
-                  (long) day*24L+(long) tm->hour, (int) tm->minute,
-                  (int) tm->second);
-  if (tm->second_part)
-    length+= sprintf(buff+length, ".%06d", (int) tm->second_part);
+  char buff[MAX_DATE_STRING_REP_LENGTH];
+  uint length= my_time_to_str(tm, buff, decimals);
   return net_store_data((uchar*) buff, length);
 }
 
@@ -1380,7 +1409,7 @@ bool Protocol_binary::store(Field *field)
 }
 
 
-bool Protocol_binary::store(MYSQL_TIME *tm)
+bool Protocol_binary::store(MYSQL_TIME *tm, int decimals)
 {
   char buff[12],*pos;
   uint length;
@@ -1393,6 +1422,10 @@ bool Protocol_binary::store(MYSQL_TIME *tm)
   pos[4]= (uchar) tm->hour;
   pos[5]= (uchar) tm->minute;
   pos[6]= (uchar) tm->second;
+  DBUG_ASSERT(decimals == AUTO_SEC_PART_DIGITS ||
+              (decimals >= 0 && decimals <= TIME_SECOND_PART_DIGITS));
+  if (decimals != AUTO_SEC_PART_DIGITS)
+    tm->second_part= sec_part_truncate(tm->second_part, decimals);
   int4store(pos+7, tm->second_part);
   if (tm->second_part)
     length=11;
@@ -1410,11 +1443,11 @@ bool Protocol_binary::store_date(MYSQL_TIME *tm)
 {
   tm->hour= tm->minute= tm->second=0;
   tm->second_part= 0;
-  return Protocol_binary::store(tm);
+  return Protocol_binary::store(tm, 0);
 }
 
 
-bool Protocol_binary::store_time(MYSQL_TIME *tm)
+bool Protocol_binary::store_time(MYSQL_TIME *tm, int decimals)
 {
   char buff[13], *pos;
   uint length;
@@ -1423,7 +1456,6 @@ bool Protocol_binary::store_time(MYSQL_TIME *tm)
   pos[0]= tm->neg ? 1 : 0;
   if (tm->hour >= 24)
   {
-    /* Fix if we come from Item::send */
     uint days= tm->hour/24;
     tm->hour-= days*24;
     tm->day+= days;
@@ -1432,6 +1464,10 @@ bool Protocol_binary::store_time(MYSQL_TIME *tm)
   pos[5]= (uchar) tm->hour;
   pos[6]= (uchar) tm->minute;
   pos[7]= (uchar) tm->second;
+  DBUG_ASSERT(decimals == AUTO_SEC_PART_DIGITS ||
+              (decimals >= 0 && decimals <= TIME_SECOND_PART_DIGITS));
+  if (decimals != AUTO_SEC_PART_DIGITS)
+    tm->second_part= sec_part_truncate(tm->second_part, decimals);
   int4store(pos+8, tm->second_part);
   if (tm->second_part)
     length=12;
diff --git a/sql/protocol.h b/sql/protocol.h
index 3bd79b5ab58..d9bc48ce45d 100644
--- a/sql/protocol.h
+++ b/sql/protocol.h
@@ -109,9 +109,9 @@ public:
   		     CHARSET_INFO *fromcs, CHARSET_INFO *tocs)=0;
   virtual bool store(float from, uint32 decimals, String *buffer)=0;
   virtual bool store(double from, uint32 decimals, String *buffer)=0;
-  virtual bool store(MYSQL_TIME *time)=0;
+  virtual bool store(MYSQL_TIME *time, int decimals)=0;
   virtual bool store_date(MYSQL_TIME *time)=0;
-  virtual bool store_time(MYSQL_TIME *time)=0;
+  virtual bool store_time(MYSQL_TIME *time, int decimals)=0;
   virtual bool store(Field *field)=0;
 
   virtual bool send_out_parameters(List<Item_param> *sp_params)=0;
@@ -152,9 +152,9 @@ public:
   virtual bool store(const char *from, size_t length, CHARSET_INFO *cs);
   virtual bool store(const char *from, size_t length,
   		     CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
-  virtual bool store(MYSQL_TIME *time);
+  virtual bool store(MYSQL_TIME *time, int decimals);
   virtual bool store_date(MYSQL_TIME *time);
-  virtual bool store_time(MYSQL_TIME *time);
+  virtual bool store_time(MYSQL_TIME *time, int decimals);
   virtual bool store(float nr, uint32 decimals, String *buffer);
   virtual bool store(double from, uint32 decimals, String *buffer);
   virtual bool store(Field *field);
@@ -189,9 +189,9 @@ public:
   virtual bool store(const char *from, size_t length, CHARSET_INFO *cs);
   virtual bool store(const char *from, size_t length,
   		     CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
-  virtual bool store(MYSQL_TIME *time);
+  virtual bool store(MYSQL_TIME *time, int decimals);
   virtual bool store_date(MYSQL_TIME *time);
-  virtual bool store_time(MYSQL_TIME *time);
+  virtual bool store_time(MYSQL_TIME *time, int decimals);
   virtual bool store(float nr, uint32 decimals, String *buffer);
   virtual bool store(double from, uint32 decimals, String *buffer);
   virtual bool store(Field *field);
@@ -204,6 +204,7 @@ public:
 void send_warning(THD *thd, uint sql_errno, const char *err=0);
 bool net_send_error(THD *thd, uint sql_errno, const char *err,
                     const char* sqlstate);
+void net_send_progress_packet(THD *thd);
 uchar *net_store_data(uchar *to,const uchar *from, size_t length);
 uchar *net_store_data(uchar *to,int32 from);
 uchar *net_store_data(uchar *to,longlong from);
diff --git a/sql/records.cc b/sql/records.cc
index 2274b1b5699..a2bb49f9792 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -30,7 +30,7 @@
 #include "filesort.h"            // filesort_free_buffers
 #include "opt_range.h"                          // SQL_SELECT
 #include "sql_class.h"                          // THD
-
+#include "sql_base.h"
 
 static int rr_quick(READ_RECORD *info);
 int rr_sequential(READ_RECORD *info);
@@ -169,7 +169,8 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
     This is the most basic access method of a table using rnd_init,
     rnd_next and rnd_end. No indexes are used.
 */
-void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
+
+bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
 		      SQL_SELECT *select,
 		      int use_record_cache, bool print_error, 
                       bool disable_rr_cache)
@@ -206,18 +207,10 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
 
   if (select && my_b_inited(&select->file))
     tempfile= &select->file;
-  else if (select && select->quick && select->quick->clustered_pk_range())
-  {
-    /*
-      In case of QUICK_INDEX_MERGE_SELECT with clustered pk range we have to
-      use its own access method(i.e QUICK_INDEX_MERGE_SELECT::get_next()) as
-      sort file does not contain rowids which satisfy clustered pk range.
-    */
-    tempfile= 0;
-  }
   else
     tempfile= table->sort.io_cache;
-  if (tempfile && my_b_inited(tempfile)) // Test if ref-records was used
+  if (tempfile && my_b_inited(tempfile) &&
+      !(select && select->quick)) 
   {
     DBUG_PRINT("info",("using rr_from_tempfile"));
     info->read_record= (table->sort.addon_field ?
@@ -226,7 +219,8 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
     reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
     info->ref_pos=table->file->ref;
     if (!table->file->inited)
-      table->file->ha_rnd_init(0);
+      if (table->file->ha_rnd_init_with_error(0))
+        DBUG_RETURN(1);
 
     /*
       table->sort.addon_field is checked because if we use addon fields,
@@ -235,7 +229,6 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
     */
     if (!disable_rr_cache &&
         !table->sort.addon_field &&
-        ! (specialflag & SPECIAL_SAFE_MODE) &&
 	thd->variables.read_rnd_buff_size &&
 	!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
 	(table->db_stat & HA_READ_ONLY ||
@@ -263,7 +256,8 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
   else if (table->sort.record_pointers)
   {
     DBUG_PRINT("info",("using record_pointers"));
-    table->file->ha_rnd_init(0);
+    if (table->file->ha_rnd_init_with_error(0))
+      DBUG_RETURN(1);
     info->cache_pos=table->sort.record_pointers;
     info->cache_end=info->cache_pos+ 
                     table->sort.found_records*info->ref_length;
@@ -274,7 +268,8 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
   {
     DBUG_PRINT("info",("using rr_sequential"));
     info->read_record=rr_sequential;
-    table->file->ha_rnd_init(1);
+    if (table->file->ha_rnd_init_with_error(1))
+      DBUG_RETURN(1);
     /* We can use record cache if we don't update dynamic length tables */
     if (!table->no_cache &&
 	(use_record_cache > 0 ||
@@ -293,7 +288,7 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
       !table->file->pushed_cond)
     table->file->cond_push(select->cond);
 
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(0);
 } /* init_read_record */
 
 
@@ -308,7 +303,8 @@ void end_read_record(READ_RECORD *info)
   if (info->table)
   {
     filesort_free_buffers(info->table,0);
-    (void) info->file->extra(HA_EXTRA_NO_CACHE);
+    if (info->table->created)
+      (void) info->file->extra(HA_EXTRA_NO_CACHE);
     if (info->read_record != rr_quick) // otherwise quick_range does it
       (void) info->file->ha_index_or_rnd_end();
     info->table=0;
@@ -349,6 +345,7 @@ static int rr_quick(READ_RECORD *info)
       break;
     }
   }
+  update_virtual_fields(info->thd, info->table);
   return tmp;
 }
 
@@ -368,7 +365,7 @@ static int rr_quick(READ_RECORD *info)
 
 static int rr_index_first(READ_RECORD *info)
 {
-  int tmp= info->file->index_first(info->record);
+  int tmp= info->file->ha_index_first(info->record);
   info->read_record= rr_index;
   if (tmp)
     tmp= rr_handle_error(info, tmp);
@@ -391,7 +388,7 @@ static int rr_index_first(READ_RECORD *info)
 
 static int rr_index_last(READ_RECORD *info)
 {
-  int tmp= info->file->index_last(info->record);
+  int tmp= info->file->ha_index_last(info->record);
   info->read_record= rr_index_desc;
   if (tmp)
     tmp= rr_handle_error(info, tmp);
@@ -417,7 +414,7 @@ static int rr_index_last(READ_RECORD *info)
 
 static int rr_index(READ_RECORD *info)
 {
-  int tmp= info->file->index_next(info->record);
+  int tmp= info->file->ha_index_next(info->record);
   if (tmp)
     tmp= rr_handle_error(info, tmp);
   return tmp;
@@ -442,7 +439,7 @@ static int rr_index(READ_RECORD *info)
 
 static int rr_index_desc(READ_RECORD *info)
 {
-  int tmp= info->file->index_prev(info->record);
+  int tmp= info->file->ha_index_prev(info->record);
   if (tmp)
     tmp= rr_handle_error(info, tmp);
   return tmp;
@@ -452,7 +449,7 @@ static int rr_index_desc(READ_RECORD *info)
 int rr_sequential(READ_RECORD *info)
 {
   int tmp;
-  while ((tmp=info->file->rnd_next(info->record)))
+  while ((tmp= info->file->ha_rnd_next(info->record)))
   {
     /*
       rnd_next can return RECORD_DELETED for MyISAM when one thread is
@@ -464,6 +461,8 @@ int rr_sequential(READ_RECORD *info)
       break;
     }
   }
+  if (!tmp)
+    update_virtual_fields(info->thd, info->table);
   return tmp;
 }
 
@@ -475,7 +474,7 @@ static int rr_from_tempfile(READ_RECORD *info)
   {
     if (my_b_read(info->io_cache,info->ref_pos,info->ref_length))
       return -1;					/* End of file */
-    if (!(tmp=info->file->rnd_pos(info->record,info->ref_pos)))
+    if (!(tmp= info->file->ha_rnd_pos(info->record,info->ref_pos)))
       break;
     /* The following is extremely unlikely to happen */
     if (tmp == HA_ERR_RECORD_DELETED ||
@@ -526,7 +525,7 @@ static int rr_from_pointers(READ_RECORD *info)
     cache_pos= info->cache_pos;
     info->cache_pos+= info->ref_length;
 
-    if (!(tmp=info->file->rnd_pos(info->record,cache_pos)))
+    if (!(tmp= info->file->ha_rnd_pos(info->record,cache_pos)))
       break;
 
     /* The following is extremely unlikely to happen */
@@ -589,7 +588,7 @@ static int init_rr_cache(THD *thd, READ_RECORD *info)
 					   info->struct_length+1,
 					   MYF(0))))
     DBUG_RETURN(1);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   // Avoid warnings in qsort
   bzero(info->cache,rec_cache_size+info->cache_records* info->struct_length+1);
 #endif
@@ -659,7 +658,7 @@ static int rr_from_cache(READ_RECORD *info)
       record=uint3korr(position);
       position+=3;
       record_pos=info->cache+record*info->reclength;
-      if ((error=(int16) info->file->rnd_pos(record_pos,info->ref_pos)))
+      if ((error=(int16) info->file->ha_rnd_pos(record_pos,info->ref_pos)))
       {
 	record_pos[info->error_offset]=1;
 	shortstore(record_pos,error);
diff --git a/sql/records.h b/sql/records.h
index 28f83a6ea8b..27e861cbd1e 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -25,6 +25,7 @@ class handler;
 struct TABLE;
 class THD;
 class SQL_SELECT;
+class Copy_field;
 
 /**
   A context for reading through a single table using a chosen access method:
@@ -63,11 +64,17 @@ struct READ_RECORD
   struct st_io_cache *io_cache;
   bool print_error, ignore_not_found_rows;
 
+  /* 
+    SJ-Materialization runtime may need to read fields from the materialized
+    table and unpack them into original table fields:
+  */
+  Copy_field *copy_field;
+  Copy_field *copy_field_end;
 public:
   READ_RECORD() {}
 };
 
-void init_read_record(READ_RECORD *info, THD *thd, TABLE *reg_form,
+bool init_read_record(READ_RECORD *info, THD *thd, TABLE *reg_form,
 		      SQL_SELECT *select, int use_record_cache,
                       bool print_errors, bool disable_rr_cache);
 void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc
index 2c3d28462aa..e355be185db 100644
--- a/sql/repl_failsafe.cc
+++ b/sql/repl_failsafe.cc
@@ -217,7 +217,6 @@ void end_slave_list()
   }
 }
 
-
 /**
   Execute a SHOW SLAVE HOSTS statement.
 
diff --git a/sql/rpl_filter.cc b/sql/rpl_filter.cc
index e8b7837744b..5f5473e09ab 100644
--- a/sql/rpl_filter.cc
+++ b/sql/rpl_filter.cc
@@ -14,7 +14,7 @@
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 
 #include "sql_priv.h"
-#include "unireg.h"                      // REQUIRED by other includes
+#include "mysqld.h"                             // system_charset_info
 #include "rpl_filter.h"
 #include "hash.h"                               // my_hash_free
 #include "table.h"                              // TABLE_LIST
@@ -48,6 +48,7 @@ Rpl_filter::~Rpl_filter()
 }
 
 
+#ifndef MYSQL_CLIENT
 /*
   Returns true if table should be logged/replicated 
 
@@ -94,7 +95,7 @@ Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables)
   
   for (; tables; tables= tables->next_global)
   {
-    char hash_key[2*NAME_LEN+2];
+    char hash_key[SAFE_NAME_LEN*2+2];
     char *end;
     uint len;
 
@@ -132,6 +133,7 @@ Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables)
               !do_table_inited && !wild_do_table_inited);
 }
 
+#endif
 
 /*
   Checks whether a db matches some do_db and ignore_db rules
@@ -158,8 +160,10 @@ Rpl_filter::db_ok(const char* db)
     and db was not selected, do not replicate.
   */
   if (!db)
+  {
+    DBUG_PRINT("exit", ("Don't replicate"));
     DBUG_RETURN(0);
-
+  }
   if (!do_db.is_empty()) // if the do's are not empty
   {
     I_List_iterator<i_string> it(do_db);
@@ -170,6 +174,7 @@ Rpl_filter::db_ok(const char* db)
       if (!strcmp(tmp->ptr, db))
 	DBUG_RETURN(1); // match
     }
+    DBUG_PRINT("exit", ("Don't replicate"));
     DBUG_RETURN(0);
   }
   else // there are some elements in the don't, otherwise we cannot get here
@@ -180,7 +185,10 @@ Rpl_filter::db_ok(const char* db)
     while ((tmp=it++))
     {
       if (!strcmp(tmp->ptr, db))
+      {
+        DBUG_PRINT("exit", ("Don't replicate"));
 	DBUG_RETURN(0); // match
+      }
     }
     DBUG_RETURN(1);
   }
@@ -222,7 +230,7 @@ Rpl_filter::db_ok_with_wild_table(const char *db)
 {
   DBUG_ENTER("Rpl_filter::db_ok_with_wild_table");
 
-  char hash_key[NAME_LEN+2];
+  char hash_key[SAFE_NAME_LEN+2];
   char *end;
   int len;
   end= strmov(hash_key, db);
@@ -517,6 +525,13 @@ Rpl_filter::get_wild_ignore_table(String* str)
 }
 
 
+bool
+Rpl_filter::rewrite_db_is_empty()
+{
+  return rewrite_db.is_empty();
+}
+
+
 const char*
 Rpl_filter::get_rewrite_db(const char* db, size_t *new_len)
 {
diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h
index b3a6a07842d..d32fb36d6fb 100644
--- a/sql/rpl_filter.h
+++ b/sql/rpl_filter.h
@@ -48,7 +48,9 @@ public:
  
   /* Checks - returns true if ok to replicate/log */
 
-  bool tables_ok(const char* db, TABLE_LIST* tables);
+#ifndef MYSQL_CLIENT
+  bool tables_ok(const char* db, TABLE_LIST *tables);
+#endif 
   bool db_ok(const char* db);
   bool db_ok_with_wild_table(const char *db);
 
@@ -75,6 +77,7 @@ public:
   void get_wild_do_table(String* str);
   void get_wild_ignore_table(String* str);
 
+  bool rewrite_db_is_empty();
   const char* get_rewrite_db(const char* db, size_t *new_len);
 
   I_List<i_string>* get_do_db();
diff --git a/sql/rpl_handler.cc b/sql/rpl_handler.cc
index c784001e970..9267190605c 100644
--- a/sql/rpl_handler.cc
+++ b/sql/rpl_handler.cc
@@ -43,7 +43,7 @@ static pthread_key(Trans_binlog_info*, RPL_TRANS_BINLOG_INFO);
 int get_user_var_int(const char *name,
                      long long int *value, int *null_value)
 {
-  my_bool null_val;
+  bool null_val;
   user_var_entry *entry= 
     (user_var_entry*) my_hash_search(&current_thd->user_vars,
                                   (uchar*) name, strlen(name));
@@ -58,7 +58,7 @@ int get_user_var_int(const char *name,
 int get_user_var_real(const char *name,
                       double *value, int *null_value)
 {
-  my_bool null_val;
+  bool null_val;
   user_var_entry *entry= 
     (user_var_entry*) my_hash_search(&current_thd->user_vars,
                                   (uchar*) name, strlen(name));
@@ -74,7 +74,7 @@ int get_user_var_str(const char *name, char *value,
                      size_t len, unsigned int precision, int *null_value)
 {
   String str;
-  my_bool null_val;
+  bool null_val;
   user_var_entry *entry= 
     (user_var_entry*) my_hash_search(&current_thd->user_vars,
                                   (uchar*) name, strlen(name));
@@ -196,7 +196,7 @@ void delegates_destroy()
   for (; info; info= iter++)                                            \
   {                                                                     \
     plugin_ref plugin=                                                  \
-      my_plugin_lock(0, &info->plugin);                                 \
+      my_plugin_lock(0,  info->plugin);                                 \
     if (!plugin)                                                        \
     {                                                                   \
       /* plugin is not intialized or deleted, this is not an error */   \
@@ -357,7 +357,7 @@ int Binlog_transmit_delegate::reserve_header(THD *thd, ushort flags,
   for (; info; info= iter++)
   {
     plugin_ref plugin=
-      my_plugin_lock(thd, &info->plugin);
+      my_plugin_lock(thd, info->plugin);
     if (!plugin)
     {
       ret= 1;
diff --git a/sql/rpl_injector.cc b/sql/rpl_injector.cc
index d4e3e0fd7b4..dfa5ef95e67 100644
--- a/sql/rpl_injector.cc
+++ b/sql/rpl_injector.cc
@@ -40,6 +40,7 @@ injector::transaction::transaction(MYSQL_BIN_LOG *log, THD *thd)
   m_start_pos.m_file_name= my_strdup(log_info.log_file_name, MYF(0));
   m_start_pos.m_file_pos= log_info.pos;
 
+  m_thd->lex->start_transaction_opt= 0; /* for begin_trans() */
   trans_begin(m_thd);
 }
 
diff --git a/sql/rpl_injector.h b/sql/rpl_injector.h
index 74768559f05..f4790cc963a 100644
--- a/sql/rpl_injector.h
+++ b/sql/rpl_injector.h
@@ -290,7 +290,7 @@ public:
           "START_STATE", "TABLE_STATE", "ROW_STATE", "STATE_COUNT"
         };
 
-        DBUG_ASSERT(0 <= target_state && target_state <= STATE_COUNT);
+        DBUG_ASSERT(target_state <= STATE_COUNT);
         DBUG_PRINT("info", ("In state %s", state_name[m_state]));
 #endif
 
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index 0060bcb6b1e..164173467f1 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2006, 2010, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -24,19 +25,13 @@
 
 #define DEFAULT_CONNECT_RETRY 60
 
-// Defined in slave.cc
-int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
-int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
-			  const char *default_val);
-int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val);
-int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f);
-
 static void init_master_log_pos(Master_info* mi);
 
 Master_info::Master_info(bool is_slave_recovery)
   :Slave_reporting_capability("I/O"),
-   ssl(0), ssl_verify_server_cert(0), fd(-1), io_thd(0), 
+   ssl(0), ssl_verify_server_cert(1), fd(-1), io_thd(0), 
    rli(is_slave_recovery), port(MYSQL_PORT),
+   checksum_alg_before_fd(BINLOG_CHECKSUM_ALG_UNDEF),
    connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0),
    slave_running(0), slave_run_id(0), sync_counter(0),
    heartbeat_period(0), received_heartbeats(0), master_id(0)
@@ -49,6 +44,8 @@ Master_info::Master_info(bool is_slave_recovery)
   bzero((char*) &file, sizeof(file));
   mysql_mutex_init(key_master_info_run_lock, &run_lock, MY_MUTEX_INIT_FAST);
   mysql_mutex_init(key_master_info_data_lock, &data_lock, MY_MUTEX_INIT_FAST);
+  mysql_mutex_setflags(&run_lock, MYF_NO_DEADLOCK_DETECTION);
+  mysql_mutex_setflags(&data_lock, MYF_NO_DEADLOCK_DETECTION);
   mysql_cond_init(key_master_info_data_cond, &data_cond, NULL);
   mysql_cond_init(key_master_info_start_cond, &start_cond, NULL);
   mysql_cond_init(key_master_info_stop_cond, &stop_cond, NULL);
diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h
index 1434ba45519..54623577e0b 100644
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -69,10 +69,10 @@ class Master_info : public Slave_reporting_capability
   char host[HOSTNAME_LENGTH+1];
   char user[USERNAME_LENGTH+1];
   char password[MAX_PASSWORD_LENGTH+1];
-  my_bool ssl; // enables use of SSL connection if true
+  bool ssl; // enables use of SSL connection if true
   char ssl_ca[FN_REFLEN], ssl_capath[FN_REFLEN], ssl_cert[FN_REFLEN];
   char ssl_cipher[FN_REFLEN], ssl_key[FN_REFLEN];
-  my_bool ssl_verify_server_cert;
+  bool ssl_verify_server_cert;
 
   my_off_t master_log_pos;
   File fd; // we keep the file open, so we need to remember the file pointer
@@ -85,6 +85,12 @@ class Master_info : public Slave_reporting_capability
   uint32 file_id;				/* for 3.23 load data infile */
   Relay_log_info rli;
   uint port;
+  /*
+    to hold checksum alg in use until IO thread has received FD.
+    Initialized to novalue, then set to the queried from master
+    @@global.binlog_checksum and deactivated once FD has been received.
+  */
+  uint8 checksum_alg_before_fd;
   uint connect_retry;
 #ifndef DBUG_OFF
   int events_till_disconnect;
diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc
index fc123acb104..c893dc52587 100644
--- a/sql/rpl_record.cc
+++ b/sql/rpl_record.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2007, 2010, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -102,7 +102,7 @@ pack_row(TABLE *table, MY_BITMAP const* cols,
         const uchar *old_pack_ptr= pack_ptr;
 #endif
         pack_ptr= field->pack(pack_ptr, field->ptr + offset,
-                              field->max_data_length(), TRUE);
+                              field->max_data_length());
         DBUG_PRINT("debug", ("field: %s; real_type: %d, pack_ptr: 0x%lx;"
                              " pack_ptr':0x%lx; bytes: %d",
                              field->field_name, field->real_type(),
@@ -303,7 +303,7 @@ unpack_row(Relay_log_info const *rli,
 #ifndef DBUG_OFF
         uchar const *const old_pack_ptr= pack_ptr;
 #endif
-        pack_ptr= f->unpack(f->ptr, pack_ptr, metadata, TRUE);
+        pack_ptr= f->unpack(f->ptr, pack_ptr, metadata);
 	DBUG_PRINT("debug", ("field: %s; metadata: 0x%x;"
                              " pack_ptr: 0x%lx; pack_ptr': 0x%lx; bytes: %d",
                              f->field_name, metadata,
diff --git a/sql/rpl_record.h b/sql/rpl_record.h
index e7d80960fe4..c857c07c33b 100644
--- a/sql/rpl_record.h
+++ b/sql/rpl_record.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2007, 2010, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
diff --git a/sql/rpl_record_old.h b/sql/rpl_record_old.h
index 074c7897c04..201f62b1946 100644
--- a/sql/rpl_record_old.h
+++ b/sql/rpl_record_old.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2007, 2010, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index e1c764a4248..26d405306c9 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -43,7 +43,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
    sync_counter(0), is_relay_log_recovery(is_slave_recovery),
    save_temporary_tables(0), cur_log_old_open_count(0), group_relay_log_pos(0), 
    event_relay_log_pos(0),
-#if HAVE_purify
+#if HAVE_valgrind
    is_fake(FALSE),
 #endif
    group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0),
@@ -52,7 +52,8 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
    inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
    until_log_pos(0), retried_trans(0),
    tables_to_lock(0), tables_to_lock_count(0),
-   last_event_start_time(0), m_flags(0)
+   last_event_start_time(0), m_flags(0),
+   m_annotate_event(0)
 {
   DBUG_ENTER("Relay_log_info::Relay_log_info");
 
@@ -95,6 +96,7 @@ Relay_log_info::~Relay_log_info()
   mysql_cond_destroy(&stop_cond);
   mysql_cond_destroy(&log_space_cond);
   relay_log.cleanup();
+  free_annotate_event();
   DBUG_VOID_RETURN;
 }
 
@@ -193,9 +195,13 @@ a file name for --relay-log-index option", opt_relaylog_index_name);
                         " so replication "
                         "may break when this MySQL server acts as a "
                         "slave and has his hostname changed!! Please "
-                        "use '--relay-log=%s' to avoid this problem.", ln);
+                        "use '--log-basename=#' or '--relay-log=%s' to avoid "
+                        "this problem.", ln);
       name_warning_sent= 1;
     }
+
+    rli->relay_log.is_relay_log= TRUE;
+
     /*
       note, that if open() fails, we'll still have index file open
       but a destructor will take care of that
@@ -209,7 +215,6 @@ a file name for --relay-log-index option", opt_relaylog_index_name);
       sql_print_error("Failed in open_log() called from init_relay_log_info()");
       DBUG_RETURN(1);
     }
-    rli->relay_log.is_relay_log= TRUE;
   }
 
   /* if file does not exist */
@@ -564,8 +569,9 @@ int init_relay_log_pos(Relay_log_info* rli,const char* log,
         Because of we have rli->data_lock and log_lock, we can safely read an
         event
       */
-      if (!(ev=Log_event::read_log_event(rli->cur_log,0,
-                                         rli->relay_log.description_event_for_exec)))
+      if (!(ev= Log_event::read_log_event(rli->cur_log, 0,
+                                          rli->relay_log.description_event_for_exec,
+                                          opt_slave_sql_verify_checksum)))
       {
         DBUG_PRINT("info",("could not read event, rli->cur_log->error=%d",
                            rli->cur_log->error));
@@ -1203,11 +1209,8 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos,
       is that value may take some time to display in
       Seconds_Behind_Master - not critical).
     */
-#ifndef DBUG_OFF
-    if (!(event_creation_time == 0 && debug_not_change_ts_if_art_event > 0))
-#else
-      if (event_creation_time != 0)
-#endif
+    if (!(event_creation_time == 0 &&
+          IF_DBUG(debug_not_change_ts_if_art_event > 0, 1)))
         last_master_timestamp= event_creation_time;
   }
 }
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index fb62279c4c1..3aa9a4f3488 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -21,6 +21,7 @@
 #include "rpl_utility.h"
 #include "log.h"                         /* LOG_INFO, MYSQL_BIN_LOG */
 #include "sql_class.h"                   /* THD */
+#include "log_event.h"
 
 struct RPL_TABLE_LIST;
 class Master_info;
@@ -180,7 +181,7 @@ public:
   ulonglong event_relay_log_pos;
   ulonglong future_event_relay_log_pos;
 
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   bool is_fake; /* Mark that this is a fake relay log info structure */
 #endif
 
@@ -452,8 +453,46 @@ public:
       (m_flags & (1UL << IN_STMT));
   }
 
+  /**
+    Save pointer to Annotate_rows event and switch on the
+    binlog_annotate_rows_events for this sql thread.
+    To be called when sql thread recieves an Annotate_rows event.
+  */
+  inline void set_annotate_event(Annotate_rows_log_event *event)
+  {
+    free_annotate_event();
+    m_annotate_event= event;
+    sql_thd->variables.binlog_annotate_rows_events= 1;
+  }
+
+  /**
+    Returns pointer to the saved Annotate_rows event or NULL if there is
+    no saved event.
+  */
+  inline Annotate_rows_log_event* get_annotate_event()
+  {
+    return m_annotate_event;
+  }
+
+  /**
+    Delete saved Annotate_rows event (if any) and switch off the
+    binlog_annotate_rows_events for this sql thread.
+    To be called when sql thread has applied the last (i.e. with
+    STMT_END_F flag) rbr event.
+  */
+  inline void free_annotate_event()
+  {
+    if (m_annotate_event)
+    {
+      sql_thd->variables.binlog_annotate_rows_events= 0;
+      delete m_annotate_event;
+      m_annotate_event= 0;
+    }
+  }
+
 private:
   uint32 m_flags;
+  Annotate_rows_log_event *m_annotate_event;
 };
 
 
diff --git a/sql/rpl_tblmap.cc b/sql/rpl_tblmap.cc
index fca39c2a17b..b7ac1b2d091 100644
--- a/sql/rpl_tblmap.cc
+++ b/sql/rpl_tblmap.cc
@@ -34,6 +34,7 @@
 table_mapping::table_mapping()
   : m_free(0)
 {
+  DBUG_ENTER("table_mapping::table_mapping");
   /*
     No "free_element" function for entries passed here, as the entries are
     allocated in a MEM_ROOT (freed as a whole in the destructor), they cannot
@@ -46,6 +47,7 @@ table_mapping::table_mapping()
 		   0,0,0);
   /* We don't preallocate any block, this is consistent with m_free=0 above */
   init_alloc_root(&m_mem_root, TABLE_ID_HASH_SIZE*sizeof(entry), 0);
+  DBUG_VOID_RETURN;
 }
 
 table_mapping::~table_mapping()
diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc
index 8638d5e3c16..4ab4542dd13 100644
--- a/sql/rpl_utility.cc
+++ b/sql/rpl_utility.cc
@@ -303,7 +303,7 @@ uint32 table_def::calc_field_size(uint col, uchar *master_data) const
       always read the length in little-endian order.
     */
     Field_blob fb(m_field_metadata[col]);
-    length= fb.get_packed_size(master_data, TRUE);
+    length= fb.get_packed_size(master_data);
 #else
     /*
       Compute the length of the data. We cannot use get_length() here
@@ -1056,3 +1056,58 @@ table_def::~table_def()
 #endif
 }
 
+/**
+   @param   even_buf    point to the buffer containing serialized event
+   @param   event_len   length of the event accounting possible checksum alg
+
+   @return  TRUE        if test fails
+            FALSE       as success
+*/
+bool event_checksum_test(uchar *event_buf, ulong event_len, uint8 alg)
+{
+  bool res= FALSE;
+  uint16 flags= 0; // to store in FD's buffer flags orig value
+
+  if (alg != BINLOG_CHECKSUM_ALG_OFF && alg != BINLOG_CHECKSUM_ALG_UNDEF)
+  {
+    ha_checksum incoming;
+    ha_checksum computed;
+
+    if (event_buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT)
+    {
+#ifndef DBUG_OFF
+      int8 fd_alg= event_buf[event_len - BINLOG_CHECKSUM_LEN - 
+                             BINLOG_CHECKSUM_ALG_DESC_LEN];
+#endif
+      /*
+        FD event is checksummed and therefore verified w/o the binlog-in-use flag
+      */
+      flags= uint2korr(event_buf + FLAGS_OFFSET);
+      if (flags & LOG_EVENT_BINLOG_IN_USE_F)
+        event_buf[FLAGS_OFFSET] &= ~LOG_EVENT_BINLOG_IN_USE_F;
+      /* 
+         The only algorithm currently is CRC32. Zero indicates 
+         the binlog file is checksum-free *except* the FD-event.
+      */
+      DBUG_ASSERT(fd_alg == BINLOG_CHECKSUM_ALG_CRC32 || fd_alg == 0);
+      DBUG_ASSERT(alg == BINLOG_CHECKSUM_ALG_CRC32);
+      /*
+        Complile time guard to watch over  the max number of alg
+      */
+      compile_time_assert(BINLOG_CHECKSUM_ALG_ENUM_END <= 0x80);
+    }
+    incoming= uint4korr(event_buf + event_len - BINLOG_CHECKSUM_LEN);
+    computed= my_checksum(0L, NULL, 0);
+    /* checksum the event content but the checksum part itself */
+    computed= my_checksum(computed, (const uchar*) event_buf, 
+                          event_len - BINLOG_CHECKSUM_LEN);
+    if (flags != 0)
+    {
+      /* restoring the orig value of flags of FD */
+      DBUG_ASSERT(event_buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT);
+      event_buf[FLAGS_OFFSET]= (uchar) flags;
+    }
+    res= !(computed == incoming);
+  }
+  return DBUG_EVALUATE_IF("simulate_checksum_test_failure", TRUE, res);
+}
diff --git a/sql/scheduler.cc b/sql/scheduler.cc
index a2abc7c14bf..f04fdef39f9 100644
--- a/sql/scheduler.cc
+++ b/sql/scheduler.cc
@@ -21,11 +21,10 @@
 #pragma implementation
 #endif
 
-#include <sql_priv.h>
-#include "unireg.h"                    // REQUIRED: for other includes
-#include "scheduler.h"
 #include "sql_connect.h"         // init_new_connection_handler_thread
 #include "scheduler.h"
+#include "mysqld.h"
+#include "sql_class.h"
 #include "sql_callback.h"
 
 /*
@@ -39,41 +38,6 @@ static bool no_threads_end(THD *thd, bool put_in_cache)
   return 1;                                     // Abort handle_one_connection
 }
 
-static scheduler_functions one_thread_scheduler_functions=
-{
-  1,                                     // max_threads
-  NULL,                                  // init
-  init_new_connection_handler_thread,    // init_new_connection_thread
-#ifndef EMBEDDED_LIBRARY
-  handle_connection_in_main_thread,      // add_connection
-#else
-  NULL,                                  // add_connection
-#endif // EMBEDDED_LIBRARY
-  NULL,                                  // thd_wait_begin
-  NULL,                                  // thd_wait_end
-  NULL,                                  // post_kill_notification
-  no_threads_end,                        // end_thread
-  NULL,                                  // end
-};
-
-#ifndef EMBEDDED_LIBRARY
-static scheduler_functions one_thread_per_connection_scheduler_functions=
-{
-  0,                                     // max_threads
-  NULL,                                  // init
-  init_new_connection_handler_thread,    // init_new_connection_thread
-  create_thread_to_handle_connection,    // add_connection
-  NULL,                                  // thd_wait_begin
-  NULL,                                  // thd_wait_end
-  NULL,                                  // post_kill_notification
-  one_thread_per_connection_end,         // end_thread
-  NULL,                                  // end
-};
-#endif  // EMBEDDED_LIBRARY
-
-
-scheduler_functions *thread_scheduler= NULL;
-
 /** @internal
   Helper functions to allow mysys to call the thread scheduler when
   waiting for locks.
@@ -83,21 +47,27 @@ scheduler_functions *thread_scheduler= NULL;
 extern "C"
 {
 static void scheduler_wait_lock_begin(void) {
-  MYSQL_CALLBACK(thread_scheduler,
-                 thd_wait_begin, (current_thd, THD_WAIT_TABLE_LOCK));
+  THD *thd=current_thd;
+  scheduler_functions *func= thd->scheduler;
+  MYSQL_CALLBACK(func, thd_wait_begin, (thd, THD_WAIT_TABLE_LOCK));
 }
 
 static void scheduler_wait_lock_end(void) {
-  MYSQL_CALLBACK(thread_scheduler, thd_wait_end, (current_thd));
+  THD *thd=current_thd;
+  scheduler_functions *func= thd->scheduler;
+  MYSQL_CALLBACK(func, thd_wait_end, (thd));
 }
 
 static void scheduler_wait_sync_begin(void) {
-  MYSQL_CALLBACK(thread_scheduler,
-                 thd_wait_begin, (current_thd, THD_WAIT_TABLE_LOCK));
+  THD *thd=current_thd;
+  scheduler_functions *func= thd ? thd->scheduler : thread_scheduler;
+  MYSQL_CALLBACK(func, thd_wait_begin, (thd, THD_WAIT_TABLE_LOCK));
 }
 
 static void scheduler_wait_sync_end(void) {
-  MYSQL_CALLBACK(thread_scheduler, thd_wait_end, (current_thd));
+  THD *thd=current_thd;
+  scheduler_functions *func= thd ? thd->scheduler : thread_scheduler;
+  MYSQL_CALLBACK(func, thd_wait_end, (thd));
 }
 };
 /**@}*/
@@ -121,11 +91,17 @@ static void scheduler_init() {
 */
 
 #ifndef EMBEDDED_LIBRARY
-void one_thread_per_connection_scheduler()
+void one_thread_per_connection_scheduler(scheduler_functions *func,
+    ulong *arg_max_connections,
+    uint *arg_connection_count)
 {
   scheduler_init();
-  one_thread_per_connection_scheduler_functions.max_threads= max_connections;
-  thread_scheduler= &one_thread_per_connection_scheduler_functions;
+  func->max_threads= *arg_max_connections + 1;
+  func->max_connections= arg_max_connections;
+  func->connection_count= arg_connection_count;
+  func->init_new_connection_thread= init_new_connection_handler_thread;
+  func->add_connection= create_thread_to_handle_connection;
+  func->end_thread= one_thread_per_connection_end;
 }
 #endif
 
@@ -133,16 +109,22 @@ void one_thread_per_connection_scheduler()
   Initailize scheduler for --thread-handling=no-threads
 */
 
-void one_thread_scheduler()
+void one_thread_scheduler(scheduler_functions *func)
 {
   scheduler_init();
-  thread_scheduler= &one_thread_scheduler_functions;
+  func->max_threads= 1;
+  //max_connections= 1;
+  func->max_connections= &max_connections;
+  func->connection_count= &connection_count;
+#ifndef EMBEDDED_LIBRARY
+  func->init_new_connection_thread= init_new_connection_handler_thread;
+  func->add_connection= handle_connection_in_main_thread;
+#endif
+  func->end_thread= no_threads_end;
 }
 
 
-/*
-  Initialize scheduler for --thread-handling=one-thread-per-connection
-*/
+#ifdef HAVE_POOL_OF_THREADS
 
 /*
   thd_scheduler keeps the link between THD and events.
@@ -150,15 +132,24 @@ void one_thread_scheduler()
 */
 
 thd_scheduler::thd_scheduler()
-  : m_psi(NULL), data(NULL)
+  : m_psi(NULL), logged_in(FALSE), io_event(NULL), thread_attached(FALSE)
 {
 }
 
 
 thd_scheduler::~thd_scheduler()
 {
+  my_free(io_event);
 }
 
+#endif
+
+/*
+  no pluggable schedulers in mariadb.
+  when we'll want it, we'll do it properly
+*/
+#if 0
+
 static scheduler_functions *saved_thread_scheduler;
 static uint saved_thread_handling;
 
@@ -192,6 +183,11 @@ int my_thread_scheduler_reset()
   saved_thread_scheduler= 0;
   return 0;
 }
+#else
+extern "C" int my_thread_scheduler_set(scheduler_functions *scheduler)
+{ return 1; }
 
-
+extern "C" int my_thread_scheduler_reset()
+{ return 1; }
+#endif
 
diff --git a/sql/scheduler.h b/sql/scheduler.h
index a04a32d2262..03e1ad385c1 100644
--- a/sql/scheduler.h
+++ b/sql/scheduler.h
@@ -24,13 +24,16 @@
 #pragma interface
 #endif
 
+#include <my_global.h>
+
 class THD;
 
 /* Functions used when manipulating threads */
 
 struct scheduler_functions
 {
-  uint max_threads;
+  uint max_threads, *connection_count;
+  ulong *max_connections;
   bool (*init)(void);
   bool (*init_new_connection_thread)(void);
   void (*add_connection)(THD *thd);
@@ -69,12 +72,16 @@ enum scheduler_types
   SCHEDULER_TYPES_COUNT
 };
 
-void one_thread_per_connection_scheduler();
-void one_thread_scheduler();
+void one_thread_per_connection_scheduler(scheduler_functions *func,
+    ulong *arg_max_connections, uint *arg_connection_count);
+void one_thread_scheduler(scheduler_functions *func);
 
-/*
- To be used for pool-of-threads (implemeneted differently on various OSs)
-*/
+#if defined(HAVE_LIBEVENT) && !defined(EMBEDDED_LIBRARY)
+
+#define HAVE_POOL_OF_THREADS 1
+
+struct event;
+ 
 class thd_scheduler
 {
 public:
@@ -90,17 +97,28 @@ public:
   */
   PSI_thread *m_psi;
 
-  void *data;                  /* scheduler-specific data structure */
+  bool logged_in;
+  struct event* io_event;
+  LIST list;
+  bool thread_attached;  /* Indicates if THD is attached to the OS thread */
 
   thd_scheduler();
   ~thd_scheduler();
+  bool init(THD* parent_thd);
+  bool thread_attach();
+  void thread_detach();
 };
 
-void *thd_get_scheduler_data(THD *thd);
-void thd_set_scheduler_data(THD *thd, void *data);
-PSI_thread* thd_get_psi(THD *thd);
-void thd_set_psi(THD *thd, PSI_thread *psi);
+void pool_of_threads_scheduler(scheduler_functions* func);
+#else
 
-extern scheduler_functions *thread_scheduler;
+#define pool_of_threads_scheduler(A) \
+  one_thread_per_connection_scheduler(A, &max_connections, \
+                                      &connection_count)
+
+class thd_scheduler
+{};
+
+#endif
 
 #endif
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 4cdee8e1258..ce4d201672c 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2002, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -38,7 +39,6 @@
 #include "tztime.h"     // my_tz_find, my_tz_SYSTEM, struct Time_zone
 #include "sql_acl.h"    // SUPER_ACL
 #include "sql_select.h" // free_underlaid_joins
-#include "sql_show.h"   // make_default_log_name
 #include "sql_view.h"   // updatable_views_with_limit_typelib
 #include "lock.h"                               // lock_global_read_lock,
                                                 // make_global_read_lock_block_commit,
@@ -137,7 +137,6 @@ void sys_var_end()
   @param deprecated_version if not 0 - when this variable will go away
   @param substitute if not 0 - what one should use instead when this
                    deprecated variable
-  @param parse_flag either PARSE_EARLY or PARSE_NORMAL
 */
 sys_var::sys_var(sys_var_chain *chain, const char *name_arg,
                  const char *comment, int flags_arg, ptrdiff_t off,
@@ -146,11 +145,10 @@ sys_var::sys_var(sys_var_chain *chain, const char *name_arg,
                  PolyLock *lock, enum binlog_status_enum binlog_status_arg,
                  on_check_function on_check_func,
                  on_update_function on_update_func,
-                 uint deprecated_version, const char *substitute,
-                 int parse_flag) :
+                 uint deprecated_version, const char *substitute) :
   next(0),
   binlog_status(binlog_status_arg),
-  flags(flags_arg), m_parse_flag(parse_flag), show_val_type(show_val_type_arg),
+  flags(flags_arg), show_val_type(show_val_type_arg),
   guard(lock), offset(off), on_check(on_check_func), on_update(on_update_func),
   is_os_charset(FALSE)
 {
@@ -163,7 +161,7 @@ sys_var::sys_var(sys_var_chain *chain, const char *name_arg,
     in the first (PARSE_EARLY) stage.
     See handle_options() for details.
   */
-  DBUG_ASSERT(parse_flag == PARSE_NORMAL || getopt_id <= 0 || getopt_id >= 255);
+  DBUG_ASSERT(!(flags & PARSE_EARLY) || getopt_id <= 0 || getopt_id >= 255);
 
   name.str= name_arg;     // ER_NO_DEFAULT relies on 0-termination of name_arg
   name.length= strlen(name_arg);                // and so does this.
diff --git a/sql/set_var.h b/sql/set_var.h
index 52679aa636c..d34b1db8b71 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -1,6 +1,6 @@
 #ifndef SET_VAR_INCLUDED
 #define SET_VAR_INCLUDED
-/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2002, 2010, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -59,9 +59,7 @@ public:
   sys_var *next;
   LEX_CSTRING name;
   enum flag_enum { GLOBAL, SESSION, ONLY_SESSION, SCOPE_MASK=1023,
-                   READONLY=1024, ALLOCATED=2048 };
-  static const int PARSE_EARLY= 1;
-  static const int PARSE_NORMAL= 2;
+                   READONLY=1024, ALLOCATED=2048, PARSE_EARLY=4096 };
   /**
     Enumeration type to indicate for a system variable whether
     it will be written to the binlog or not.
@@ -74,7 +72,6 @@ protected:
   typedef bool (*on_update_function)(sys_var *self, THD *thd, enum_var_type type);
 
   int flags;            ///< or'ed flag_enum values
-  int m_parse_flag;     ///< either PARSE_EARLY or PARSE_NORMAL.
   const SHOW_TYPE show_val_type; ///< what value_ptr() returns for sql_show.cc
   my_option option;     ///< min, max, default values are stored here
   PolyLock *guard;      ///< *second* lock that protects the variable
@@ -90,7 +87,7 @@ public:
           enum get_opt_arg_type getopt_arg_type, SHOW_TYPE show_val_type_arg,
           longlong def_val, PolyLock *lock, enum binlog_status_enum binlog_status_arg,
           on_check_function on_check_func, on_update_function on_update_func,
-          uint deprecated_version, const char *substitute, int parse_flag);
+          uint deprecated_version, const char *substitute);
 
   virtual ~sys_var() {}
 
@@ -133,7 +130,7 @@ public:
   }
   bool register_option(DYNAMIC_ARRAY *array, int parse_flags)
   {
-    return (option.id != -1) && (m_parse_flag & parse_flags) &&
+    return (option.id != -1) && ((flags & PARSE_EARLY) == parse_flags) &&
            insert_dynamic(array, (uchar*)&option);
   }
 
@@ -209,7 +206,7 @@ public:
     plugin_ref plugin;                  ///< for Sys_var_plugin
     Time_zone *time_zone;               ///< for Sys_var_tz
     LEX_STRING string_value;            ///< for Sys_var_charptr and others
-    void *ptr;                          ///< for Sys_var_struct
+    const void *ptr;                    ///< for Sys_var_struct
   } save_result;
   LEX_STRING base; /**< for structured variables, like keycache_name.variable_name */
 
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index ec867b43ff1..ed9a05f3d5f 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -2044,7 +2044,7 @@ ER_BLOBS_AND_NO_TERMINATED 42000 S1009
         ukr "Не можна використовувати сталу довжину строки з BLOB. Зкористайтеся 'fields terminated by'"
 ER_TEXTFILE_NOT_READABLE  
         cze "Soubor '%-.128s' mus-Bí být v adresáři databáze nebo čitelný pro všechny"
-        dan "Filen '%-.128s' skal være i database-folderen og kunne læses af alle"
+        dan "Filen '%-.128s' skal være i database-folderen, eller kunne læses af alle"
         nla "Het bestand '%-.128s' dient in de database directory voor the komen of leesbaar voor iedereen te zijn."
         eng "The file '%-.128s' must be in the database directory or be readable by all"
         jps "ファイル '%-.128s' は databse の directory にあるか全てのユーザーが読めるように許可されていなければなりません.",
@@ -3173,22 +3173,22 @@ ER_CANT_CREATE_THREAD
         swe "Kan inte skapa en ny tråd (errno %d)"
         ukr "Не можу створити нову гілку (помилка %d). Якщо ви не використали усю пам'ять, то прочитайте документацію до вашої ОС - можливо це помилка ОС"
 ER_WRONG_VALUE_COUNT_ON_ROW 21S01 
-        cze "Po-Bčet sloupců neodpovídá počtu hodnot na řádku %ld"
-        dan "Kolonne antallet stemmer ikke overens med antallet af værdier i post %ld"
-        nla "Kolom aantal komt niet overeen met waarde aantal in rij %ld"
-        eng "Column count doesn't match value count at row %ld"
-        est "Tulpade hulk erineb väärtuste hulgast real %ld"
-        ger "Anzahl der Felder stimmt nicht mit der Anzahl der Werte in Zeile %ld überein"
-        hun "Az oszlopban talalhato ertek nem egyezik meg a %ld sorban szamitott ertekkel"
-        ita "Il numero delle colonne non corrisponde al conteggio alla riga %ld"
-        kor "Row %ld에서 칼럼 카운트와 value 카운터와 일치하지 않습니다."
-        por "Contagem de colunas não confere com a contagem de valores na linha %ld"
-        rum "Numarul de coloane nu corespunde cu numarul de valori la linia %ld"
-        rus "Количество столбцов не совпадает с количеством значений в записи %ld"
-        serbian "Broj kolona ne odgovara broju vrednosti u slogu %ld"
-        spa "El número de columnas no corresponde al número en la línea %ld"
-        swe "Antalet kolumner motsvarar inte antalet värden på rad: %ld"
-        ukr "Кількість стовбців не співпадає з кількістю значень у строці %ld"
+        cze "Po-Bčet sloupců neodpovídá počtu hodnot na řádku %lu"
+        dan "Kolonne antallet stemmer ikke overens med antallet af værdier i post %lu"
+        nla "Kolom aantal komt niet overeen met waarde aantal in rij %lu"
+        eng "Column count doesn't match value count at row %lu"
+        est "Tulpade hulk erineb väärtuste hulgast real %lu"
+        ger "Anzahl der Felder stimmt nicht mit der Anzahl der Werte in Zeile %lu überein"
+        hun "Az oszlopban talalhato ertek nem egyezik meg a %lu sorban szamitott ertekkel"
+        ita "Il numero delle colonne non corrisponde al conteggio alla riga %lu"
+        kor "Row %lu에서 칼럼 카운트와 value 카운터와 일치하지 않습니다."
+        por "Contagem de colunas não confere com a contagem de valores na linha %lu"
+        rum "Numarul de coloane nu corespunde cu numarul de valori la linia %lu"
+        rus "Количество столбцов не совпадает с количеством значений в записи %lu"
+        serbian "Broj kolona ne odgovara broju vrednosti u slogu %lu"
+        spa "El número de columnas no corresponde al número en la línea %lu"
+        swe "Antalet kolumner motsvarar inte antalet värden på rad: %lu"
+        ukr "Кількість стовбців не співпадає з кількістю значень у строці %lu"
 ER_CANT_REOPEN_TABLE  
         cze "Nemohu znovuotev-Břít tabulku: '%-.192s"
         dan "Kan ikke genåbne tabel '%-.192s"
@@ -4881,29 +4881,29 @@ ER_ZLIB_Z_DATA_ERROR
 ER_CUT_VALUE_GROUP_CONCAT  
         eng "Row %u was cut by GROUP_CONCAT()"
 ER_WARN_TOO_FEW_RECORDS 01000 
-        eng "Row %ld doesn't contain data for all columns"
-        ger "Zeile %ld enthält nicht für alle Felder Daten"
-        nla "Rij %ld bevat niet de data voor alle kolommen"
-        por "Conta de registro é menor que a conta de coluna na linha %ld"
-        spa "Línea %ld no contiene datos para todas las columnas"
+        eng "Row %lu doesn't contain data for all columns"
+        ger "Zeile %lu enthält nicht für alle Felder Daten"
+        nla "Rij %lu bevat niet de data voor alle kolommen"
+        por "Conta de registro é menor que a conta de coluna na linha %lu"
+        spa "Línea %lu no contiene datos para todas las columnas"
 ER_WARN_TOO_MANY_RECORDS 01000 
-        eng "Row %ld was truncated; it contained more data than there were input columns"
-        ger "Zeile %ld gekürzt, die Zeile enthielt mehr Daten, als es Eingabefelder gibt"
-        nla "Regel %ld ingekort, bevatte meer data dan invoer kolommen"
-        por "Conta de registro é maior que a conta de coluna na linha %ld"
-        spa "Línea %ld fué truncada; La misma contine mas datos que las que existen en las columnas de entrada"
+        eng "Row %lu was truncated; it contained more data than there were input columns"
+        ger "Zeile %lu gekürzt, die Zeile enthielt mehr Daten, als es Eingabefelder gibt"
+        nla "Regel %lu ingekort, bevatte meer data dan invoer kolommen"
+        por "Conta de registro é maior que a conta de coluna na linha %lu"
+        spa "Línea %lu fué truncada; La misma contine mas datos que las que existen en las columnas de entrada"
 ER_WARN_NULL_TO_NOTNULL 22004 
-        eng "Column set to default value; NULL supplied to NOT NULL column '%s' at row %ld"
-        ger "Feld auf Vorgabewert gesetzt, da NULL für NOT-NULL-Feld '%s' in Zeile %ld angegeben"
-        por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %ld"
-        spa "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la línea %ld"
+        eng "Column set to default value; NULL supplied to NOT NULL column '%s' at row %lu"
+        ger "Feld auf Vorgabewert gesetzt, da NULL für NOT-NULL-Feld '%s' in Zeile %lu angegeben"
+        por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %lu"
+        spa "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la línea %lu"
 ER_WARN_DATA_OUT_OF_RANGE 22003 
-        eng "Out of range value for column '%s' at row %ld"
+        eng "Out of range value for column '%s' at row %lu"
 WARN_DATA_TRUNCATED 01000 
-        eng "Data truncated for column '%s' at row %ld"
-        ger "Daten abgeschnitten für Feld '%s' in Zeile %ld"
-        por "Dado truncado para coluna '%s' na linha %ld"
-        spa "Datos truncados para columna '%s' en la línea %ld"
+        eng "Data truncated for column '%s' at row %lu"
+        ger "Daten abgeschnitten für Feld '%s' in Zeile %lu"
+        por "Dado truncado para coluna '%s' na linha %lu"
+        spa "Datos truncados para columna '%s' en la línea %lu"
 ER_WARN_USING_OTHER_HANDLER  
         eng "Using storage engine %s for table '%s'"
         ger "Für Tabelle '%s' wird Speicher-Engine %s benutzt"
@@ -5082,8 +5082,8 @@ ER_UNKNOWN_TIME_ZONE
         eng "Unknown or incorrect time zone: '%-.64s'"
         ger "Unbekannte oder falsche Zeitzone: '%-.64s'"
 ER_WARN_INVALID_TIMESTAMP  
-        eng "Invalid TIMESTAMP value in column '%s' at row %ld"
-        ger "Ungültiger TIMESTAMP-Wert in Feld '%s', Zeile %ld"
+        eng "Invalid TIMESTAMP value in column '%s' at row %lu"
+        ger "Ungültiger TIMESTAMP-Wert in Feld '%s', Zeile %lu"
 ER_INVALID_CHARACTER_STRING  
         eng "Invalid %s character string: '%.64s'"
         ger "Ungültiger %s-Zeichen-String: '%.64s'"
@@ -5313,9 +5313,9 @@ ER_NO_DEFAULT_FOR_FIELD
 ER_DIVISION_BY_ZERO 22012 
         eng "Division by 0"
         ger "Division durch 0"
-ER_TRUNCATED_WRONG_VALUE_FOR_FIELD  
-        eng "Incorrect %-.32s value: '%-.128s' for column '%.192s' at row %ld"
-        ger "Falscher %-.32s-Wert: '%-.128s' für Feld '%.192s' in Zeile %ld"
+ER_TRUNCATED_WRONG_VALUE_FOR_FIELD  22007
+        eng "Incorrect %-.32s value: '%-.128s' for column '%.192s' at row %lu"
+        ger "Falscher %-.32s-Wert: '%-.128s' für Feld '%.192s' in Zeile %lu"
 ER_ILLEGAL_VALUE_FOR_TYPE 22007 
         eng "Illegal %s '%-.192s' value found during parsing"
         ger "Nicht zulässiger %s-Wert '%-.192s' beim Parsen gefunden"
@@ -5448,8 +5448,8 @@ ER_PROC_AUTO_REVOKE_FAIL
         eng "Failed to revoke all privileges to dropped routine"
         ger "Rücknahme aller Rechte für die gelöschte Routine fehlgeschlagen"
 ER_DATA_TOO_LONG 22001
-        eng "Data too long for column '%s' at row %ld"
-        ger "Daten zu lang für Feld '%s' in Zeile %ld"
+        eng "Data too long for column '%s' at row %lu"
+        ger "Daten zu lang für Feld '%s' in Zeile %lu"
 ER_SP_BAD_SQLSTATE 42000
         eng "Bad SQLSTATE: '%s'"
         ger "Ungültiger SQLSTATE: '%s'"
@@ -5505,11 +5505,11 @@ ER_SP_NO_RECURSION
         eng "Recursive stored functions and triggers are not allowed."
         ger "Rekursive gespeicherte Routinen und Triggers sind nicht erlaubt"
 ER_TOO_BIG_SCALE 42000 S1009
-        eng "Too big scale %d specified for column '%-.192s'. Maximum is %lu."
-        ger "Zu großer Skalierungsfaktor %d für Feld '%-.192s' angegeben. Maximum ist %lu"
+        eng "Too big scale %u specified for '%-.192s'. Maximum is %lu."
+        ger "Zu großer Skalierungsfaktor %u für '%-.192s' angegeben. Maximum ist %lu"
 ER_TOO_BIG_PRECISION 42000 S1009
-        eng "Too big precision %d specified for column '%-.192s'. Maximum is %lu."
-        ger "Zu große Genauigkeit %d für Feld '%-.192s' angegeben. Maximum ist %lu"
+        eng "Too big precision %u specified for '%-.192s'. Maximum is %lu."
+        ger "Zu große Genauigkeit %u für '%-.192s' angegeben. Maximum ist %lu"
 ER_M_BIGGER_THAN_D 42000 S1009
         eng "For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column '%-.192s')."
         ger "Für FLOAT(M,D), DOUBLE(M,D) oder DECIMAL(M,D) muss M >= D sein (Feld '%-.192s')"
@@ -5547,8 +5547,8 @@ ER_WARN_CANT_DROP_DEFAULT_KEYCACHE
         eng "Cannot drop default keycache"
         ger "Der vorgabemäßige Schlüssel-Cache kann nicht gelöscht werden"
 ER_TOO_BIG_DISPLAYWIDTH 42000 S1009
-        eng "Display width out of range for column '%-.192s' (max = %lu)"
-        ger "Anzeigebreite außerhalb des zulässigen Bereichs für Spalte '%-.192s' (Maximum: %lu)"
+        eng "Display width out of range for '%-.192s' (max = %lu)"
+        ger "Anzeigebreite außerhalb des zulässigen Bereichs für '%-.192s' (Maximum: %lu)"
 ER_XAER_DUPID XAE08
         eng "XAER_DUPID: The XID already exists"
         ger "XAER_DUPID: Die XID existiert bereits"
@@ -6017,7 +6017,7 @@ ER_ONLY_INTEGERS_ALLOWED
         eng "Only integers allowed as number here"
         ger "An dieser Stelle sind nur Ganzzahlen zulässig"
 ER_UNSUPORTED_LOG_ENGINE
-        eng "This storage engine cannot be used for log tables""
+        eng "This storage engine cannot be used for log tables"
         ger "Diese Speicher-Engine kann für Logtabellen nicht verwendet werden"
 ER_BAD_LOG_STATEMENT
         eng "You cannot '%s' a log table if logging is enabled"
@@ -6486,3 +6486,71 @@ ER_PLUGIN_NO_UNINSTALL
 
 ER_PLUGIN_NO_INSTALL
   eng "Plugin '%s' is marked as not dynamically installable. You have to stop the server to install it."
+
+#
+# MariaDB error messages section starts here
+#
+
+# The following is here to allow us to detect if there was missing
+# error messages in the errmsg.sys file
+
+ER_LAST_MYSQL_ERROR_MESSAGE
+   eng ""
+
+# MariaDB error numbers starts from 1900
+start-error-number 1900
+
+ER_VCOL_BASED_ON_VCOL
+  eng "A computed column cannot be based on a computed column"
+ER_VIRTUAL_COLUMN_FUNCTION_IS_NOT_ALLOWED
+  eng "Function or expression is not allowed for column '%s'"
+ER_DATA_CONVERSION_ERROR_FOR_VIRTUAL_COLUMN
+  eng "Generated value for computed column '%s' cannot be converted to type '%s'"
+ER_PRIMARY_KEY_BASED_ON_VIRTUAL_COLUMN
+  eng "Primary key cannot be defined upon a computed column"
+ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN
+  eng "Key/Index cannot be defined on a non-stored computed column"
+ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN
+  eng "Cannot define foreign key with %s clause on a computed column"
+ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN
+  eng "The value specified for computed column '%s' in table '%s' ignored"
+ER_UNSUPPORTED_ACTION_ON_VIRTUAL_COLUMN
+  eng "This is not yet supported for computed columns"
+ER_CONST_EXPR_IN_VCOL
+  eng "Constant expression in computed column function is not allowed"
+ER_ROW_EXPR_FOR_VCOL
+  eng "Expression for computed column cannot return a row"
+ER_UNSUPPORTED_ENGINE_FOR_VIRTUAL_COLUMNS
+        eng "%s storage engine does not support computed columns"
+ER_UNKNOWN_OPTION
+  eng "Unknown option '%-.64s'"
+ER_BAD_OPTION_VALUE
+  eng "Incorrect value '%-.64s' for option '%-.64s'"
+ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE
+  eng "Replication event checksum verification failed while reading from network."
+ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE
+  eng "Replication event checksum verification failed while reading from a log file."
+ER_CANT_DO_ONLINE
+        eng "Can't execute the given '%s' command as online"
+ER_DATA_OVERFLOW 22003
+        eng "Got overflow when converting '%-.128s' to %-.32s. Value truncated."
+ER_DATA_TRUNCATED 22003
+        eng "Truncated value '%-.128s' when converting to %-.32s"
+ER_BAD_DATA 22007
+        eng "Encountered illegal value '%-.128s' when converting to %-.32s"
+ER_DYN_COL_WRONG_FORMAT
+        eng "Encountered illegal format of dynamic column string"
+ER_DYN_COL_IMPLEMENTATION_LIMIT
+        eng "Dynamic column implementation limit reached"
+ER_DYN_COL_DATA 22007
+        eng "Illegal value used as argument of dynamic column function"
+ER_DYN_COL_WRONG_CHARSET
+        eng "Dynamic column contains unknown character set"
+ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
+        eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."
+ER_QUERY_CACHE_IS_DISABLED
+        eng "Query cache is disabled (resize or similar command in progress); repeat this command later"
+ER_QUERY_CACHE_IS_GLOBALY_DISABLED
+        eng "Query cache is globally disabled and you can't enable it only for this session"
+ER_VIEW_ORDERBY_IGNORED
+        eng "View '%-.192s'.'%-.192s' ORDER BY clause ignored because there is other ORDER BY clause already."
diff --git a/sql/slave.cc b/sql/slave.cc
index 7a3eee952c3..96e9beedc5b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -1006,6 +1007,8 @@ const char *print_slave_db_safe(const char* db)
   DBUG_RETURN((db ? db : ""));
 }
 
+#endif /* HAVE_REPLICATION */
+
 int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
                                  const char *default_val)
 {
@@ -1037,6 +1040,10 @@ int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
 }
 
 
+/*
+  when moving these functions to mysys, don't forget to
+  remove slave.cc from libmysqld/CMakeLists.txt
+*/
 int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
 {
   char buf[32];
@@ -1165,6 +1172,7 @@ err:
   DBUG_RETURN(ret);
 }
 
+#ifdef HAVE_REPLICATION
 
 /*
   Check if the error is caused by network.
@@ -1282,6 +1290,48 @@ static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
   }
 
   /*
+    FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A).
+    It's necessary to adjust FD_q.(A) at this point because in the following
+    course FD_q is going to be dumped to RL.
+    Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m) 
+    in queue_event and the master's (A) is installed.
+    At one step with the assignment the Relay-Log's checksum alg is set to 
+    a new value: RL.(A) := FD_q.(A). If the slave service is stopped
+    the last time assigned RL.(A) will be passed over to the restarting
+    service (to the current execution point).
+    RL.A is a "codec" to verify checksum in queue_event() almost all the time
+    the first fake Rotate event.
+    Starting from this point IO thread will executes the following checksum
+    warmup sequence  of actions:
+
+    FD_q.A := RL.A,
+    A_m^0 := master.@@global.binlog_checksum,
+    {queue_event(R_f): verifies(R_f, A_m^0)},
+    {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL),
+                        FD_q := FD_m, RL.A := FD_q.A)}
+
+    See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg
+    docs lines (binlog.h).
+    In above A_m^0 - the value of master's
+    @@binlog_checksum determined in the upcoming handshake (stored in
+    mi->checksum_alg_before_fd).
+
+
+    After the warm-up sequence IO gets to "normal" checksum verification mode
+    to use RL.A in 
+    
+    {queue_event(E_m): verifies(E_m, RL.A)}
+
+    until it has received a new FD_m.
+  */
+  mi->rli.relay_log.description_event_for_queue->checksum_alg=
+    mi->rli.relay_log.relay_log_checksum_alg;
+
+  DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF);
+  DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF); 
+  /*
     Compare the master and slave's clock. Do not die if master's clock is
     unavailable (very old master not supporting UNIX_TIMESTAMP()?).
   */
@@ -1504,18 +1554,27 @@ be equal for the Statement-format replication to work";
     }
     else if (check_io_slave_killed(mi->io_thd, mi, NULL))
       goto slave_killed_err;
-    else if (is_network_error(mysql_errno(mysql)))
+    else if (is_network_error(err_code= mysql_errno(mysql)))
     {
-      mi->report(WARNING_LEVEL, mysql_errno(mysql),
-                 "Get master TIME_ZONE failed with error: %s", mysql_error(mysql));
+      mi->report(ERROR_LEVEL, err_code,
+                 "Get master TIME_ZONE failed with error: %s",
+                 mysql_error(mysql));
       goto network_err;
-    } 
+    }
+    else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE)
+    {
+      /* We use ERROR_LEVEL to get the error logged to file */
+      mi->report(ERROR_LEVEL, err_code,
+
+                 "MySQL master doesn't have a TIME_ZONE variable. Note that"
+                 "if your timezone is not same between master and slave, your "
+                 "slave may get wrong data into timestamp columns");
+    }
     else
     {
       /* Fatal error */
       errmsg= "The slave I/O thread stops because a fatal error is encountered \
 when it try to get the value of TIME_ZONE global variable from master.";
-      err_code= mysql_errno(mysql);
       sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
       goto err;
     }
@@ -1550,6 +1609,103 @@ when it try to get the value of TIME_ZONE global variable from master.";
     mysql_free_result(mysql_store_result(mysql));
   }
  
+  /*
+    Querying if master is capable to checksum and notifying it about own
+    CRC-awareness. The master's side instant value of @@global.binlog_checksum 
+    is stored in the dump thread's uservar area as well as cached locally
+    to become known in consensus by master and slave.
+  */
+  DBUG_EXECUTE_IF("simulate_slave_unaware_checksum",
+                  mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
+                  goto past_checksum;);
+  {
+    int rc;
+    const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum";
+    master_res= NULL;
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined
+    /*
+      @c checksum_alg_before_fd is queried from master in this block.
+      If master is old checksum-unaware the value stays undefined.
+      Once the first FD will be received its alg descriptor will replace
+      the being queried one.
+    */
+    rc= mysql_real_query(mysql, query, strlen(query));
+    if (rc != 0)
+    {
+      if (check_io_slave_killed(mi->io_thd, mi, NULL))
+        goto slave_killed_err;
+
+      if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE)
+      {
+        // this is tolerable as OM -> NS is supported
+        mi->report(WARNING_LEVEL, mysql_errno(mysql),
+                   "Notifying master by %s failed with "
+                   "error: %s", query, mysql_error(mysql));
+      }
+      else
+      {
+        if (is_network_error(mysql_errno(mysql)))
+        {
+          mi->report(WARNING_LEVEL, mysql_errno(mysql),
+                     "Notifying master by %s failed with "
+                     "error: %s", query, mysql_error(mysql));
+          mysql_free_result(mysql_store_result(mysql));
+          goto network_err;
+        }
+        else
+        {
+          errmsg= "The slave I/O thread stops because a fatal error is encountered "
+            "when it tried to SET @master_binlog_checksum on master.";
+          err_code= ER_SLAVE_FATAL_ERROR;
+          sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+          mysql_free_result(mysql_store_result(mysql));
+          goto err;
+        }
+      }
+    }
+    else
+    {
+      mysql_free_result(mysql_store_result(mysql));
+      if (!mysql_real_query(mysql,
+                            STRING_WITH_LEN("SELECT @master_binlog_checksum")) &&
+          (master_res= mysql_store_result(mysql)) &&
+          (master_row= mysql_fetch_row(master_res)) &&
+          (master_row[0] != NULL))
+      {
+        mi->checksum_alg_before_fd= (uint8)
+          find_type(master_row[0], &binlog_checksum_typelib, 1) - 1;
+        // valid outcome is either of
+        DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF ||
+                    mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32);
+      }
+      else if (check_io_slave_killed(mi->io_thd, mi, NULL))
+        goto slave_killed_err;
+      else if (is_network_error(mysql_errno(mysql)))
+      {
+        mi->report(WARNING_LEVEL, mysql_errno(mysql),
+                   "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql));
+        goto network_err;
+      }
+      else
+      {
+        errmsg= "The slave I/O thread stops because a fatal error is encountered "
+          "when it tried to SELECT @master_binlog_checksum.";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        mysql_free_result(mysql_store_result(mysql));
+        goto err;
+      }
+    }
+    if (master_res)
+    {
+      mysql_free_result(master_res);
+      master_res= NULL;
+    }
+  }
+
+#ifndef DBUG_OFF
+past_checksum:
+#endif
 
 err:
   if (errmsg)
@@ -1574,6 +1730,7 @@ slave_killed_err:
   DBUG_RETURN(2);
 }
 
+
 static bool wait_for_relay_log_space(Relay_log_info* rli)
 {
   bool slave_killed=0;
@@ -2038,6 +2195,7 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
     + MAX_LOG_EVENT_HEADER;  /* note, incr over the global not session var */
   thd->slave_thread = 1;
   thd->enable_slow_log= opt_log_slow_slave_statements;
+  thd->variables.log_slow_filter= global_system_variables.log_slow_filter;
   set_slave_thread_options(thd);
   thd->client_capabilities = CLIENT_LOCAL_FILES;
   mysql_mutex_lock(&LOCK_thread_count);
@@ -2048,11 +2206,8 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
                   simulate_error|= (1 << SLAVE_THD_IO););
   DBUG_EXECUTE_IF("simulate_sql_slave_error_on_init",
                   simulate_error|= (1 << SLAVE_THD_SQL););
-#if !defined(DBUG_OFF)
-  if (init_thr_lock() || thd->store_globals() || simulate_error & (1<< thd_type))
-#else
-  if (init_thr_lock() || thd->store_globals())
-#endif
+  if (init_thr_lock() || thd->store_globals() ||
+      IF_DBUG(simulate_error & (1<< thd_type), 0))
   {
     thd->cleanup();
     DBUG_RETURN(-1);
@@ -2111,6 +2266,9 @@ static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
   
   *suppress_warnings= FALSE;
 
+  if (opt_log_slave_updates && opt_replicate_annotate_rows_events)
+    binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT;
+
   if (RUN_HOOK(binlog_relay_io,
                before_request_transmit,
                (thd, mi, binlog_flags)))
@@ -2332,7 +2490,11 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli)
   thd->set_time();                            // time the query
   thd->lex->current_select= 0;
   if (!ev->when)
-    ev->when= my_time(0);
+  {
+    my_hrtime_t hrtime= my_hrtime();
+    ev->when= hrtime_to_my_time(hrtime);
+    ev->when_sec_part= hrtime_sec_part(hrtime);
+  }
   ev->thd = thd; // because up to this point, ev->thd == 0
 
   int reason= ev->shall_skip(rli);
@@ -2367,7 +2529,7 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli)
   if (exec_res == 0)
   {
     int error= ev->update_pos(rli);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
     if (!rli->is_fake)
 #endif
     {
@@ -2502,17 +2664,41 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
 
     exec_res= apply_event_and_update_pos(ev, thd, rli);
 
-    /*
-      Format_description_log_event should not be deleted because it will be
-      used to read info about the relay log's format; it will be deleted when
-      the SQL thread does not need it, i.e. when this thread terminates.
-    */
-    if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
-    {
-      DBUG_PRINT("info", ("Deleting the event after it has been executed"));
-      delete ev;
+    switch (ev->get_type_code()) {
+      case FORMAT_DESCRIPTION_EVENT:
+        /*
+          Format_description_log_event should not be deleted because it
+          will be used to read info about the relay log's format;
+          it will be deleted when the SQL thread does not need it,
+          i.e. when this thread terminates.
+        */
+        break;
+      case ANNOTATE_ROWS_EVENT:
+        /*
+          Annotate_rows event should not be deleted because after it has
+          been applied, thd->query points to the string inside this event.
+          The thd->query will be used to generate new Annotate_rows event
+          during applying the subsequent Rows events.
+        */
+        rli->set_annotate_event((Annotate_rows_log_event*) ev);
+        break;
+      case DELETE_ROWS_EVENT:
+      case UPDATE_ROWS_EVENT:
+      case WRITE_ROWS_EVENT:
+        /*
+          After the last Rows event has been applied, the saved Annotate_rows
+          event (if any) is not needed anymore and can be deleted.
+        */
+        if (((Rows_log_event*)ev)->get_flags(Rows_log_event::STMT_END_F))
+          rli->free_annotate_event();
+        /* fall through */
+      default:
+        DBUG_PRINT("info", ("Deleting the event after it has been executed"));
+        delete ev;
+        break;
     }
 
+
     /*
       update_log_pos failed: this should not happen, so we don't
       retry.
@@ -2522,7 +2708,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
 
     if (slave_trans_retries)
     {
-      int UNINIT_VAR(temp_err);
+      int temp_err;
+      LINT_INIT(temp_err);
       if (exec_res && (temp_err= has_temporary_error(thd)))
       {
         const char *errmsg;
@@ -3132,7 +3319,6 @@ pthread_handler_t handle_slave_sql(void *arg)
   my_off_t UNINIT_VAR(saved_log_pos);
   my_off_t UNINIT_VAR(saved_master_log_pos);
   my_off_t saved_skip= 0;
-
   Relay_log_info* rli = &((Master_info*)arg)->rli;
   const char *errmsg;
 
@@ -3140,6 +3326,12 @@ pthread_handler_t handle_slave_sql(void *arg)
   my_thread_init();
   DBUG_ENTER("handle_slave_sql");
 
+  LINT_INIT(saved_master_log_pos);
+  LINT_INIT(saved_log_pos);
+
+  thd = new THD; // note that contructor of THD uses DBUG_ !
+  thd->thread_stack = (char*)&thd; // remember where our stack is
+
   DBUG_ASSERT(rli->inited);
   mysql_mutex_lock(&rli->run_lock);
   DBUG_ASSERT(!rli->slave_running);
@@ -3148,8 +3340,6 @@ pthread_handler_t handle_slave_sql(void *arg)
   rli->events_till_abort = abort_slave_event_count;
 #endif
 
-  thd = new THD; // note that contructor of THD uses DBUG_ !
-  thd->thread_stack = (char*)&thd; // remember where our stack is
   rli->sql_thd= thd;
   
   /* Inform waiting threads that slave has started */
@@ -3172,6 +3362,12 @@ pthread_handler_t handle_slave_sql(void *arg)
   thd->init_for_queries();
   thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
   set_thd_in_use_temporary_tables(rli);   // (re)set sql_thd in use for saved temp tables
+  /*
+    binlog_annotate_rows_events must be TRUE only after an Annotate_rows event
+    has been recieved and only till the last corresponding rbr event has been
+    applied. In all other cases it must be FALSE.
+  */
+  thd->variables.binlog_annotate_rows_events= 0;
   mysql_mutex_lock(&LOCK_thread_count);
   threads.append(thd);
   mysql_mutex_unlock(&LOCK_thread_count);
@@ -3624,10 +3820,15 @@ static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
   */
   if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
   {
+    DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                mi->rli.relay_log.relay_log_checksum_alg);
+    
     delete mi->rli.relay_log.description_event_for_queue;
     /* start from format 3 (MySQL 4.0) again */
     mi->rli.relay_log.description_event_for_queue= new
       Format_description_log_event(3);
+    mi->rli.relay_log.description_event_for_queue->checksum_alg=
+      mi->rli.relay_log.relay_log_checksum_alg;    
   }
   /*
     Rotate the relay log makes binlog format detection easier (at next slave
@@ -3654,7 +3855,7 @@ static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
     If we get Load event, we need to pass a non-reusable buffer
     to read_log_event, so we do a trick
   */
-  if (buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
+  if ((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
   {
     if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
     {
@@ -3680,8 +3881,9 @@ static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
     Append_block/Exec_load (the SQL thread needs the data, as that thread is not
     connected to the master).
   */
-  Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
-                                            mi->rli.relay_log.description_event_for_queue);
+  Log_event *ev=
+    Log_event::read_log_event(buf, event_len, &errmsg,
+                              mi->rli.relay_log.description_event_for_queue, 0);
   if (unlikely(!ev))
   {
     sql_print_error("Read invalid event from master: '%s',\
@@ -3768,8 +3970,9 @@ static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
   DBUG_ENTER("queue_binlog_ver_3_event");
 
   /* read_log_event() will adjust log_pos to be end_log_pos */
-  Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
-                                            mi->rli.relay_log.description_event_for_queue);
+  Log_event *ev=
+    Log_event::read_log_event(buf,event_len, &errmsg,
+                              mi->rli.relay_log.description_event_for_queue, 0);
   if (unlikely(!ev))
   {
     sql_print_error("Read invalid event from master: '%s',\
@@ -3795,6 +3998,7 @@ static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
     inc_pos= event_len;
     break;
   }
+
   if (unlikely(rli->relay_log.append(ev)))
   {
     delete ev;
@@ -3858,18 +4062,79 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
   Relay_log_info *rli= &mi->rli;
   mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
   ulong s_id;
+  bool unlock_data_lock= TRUE;
+  /*
+    FD_q must have been prepared for the first R_a event
+    inside get_master_version_and_clock()
+    Show-up of FD:s affects checksum_alg at once because
+    that changes FD_queue.
+  */
+  uint8 checksum_alg= mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ? 
+    mi->checksum_alg_before_fd :
+    mi->rli.relay_log.relay_log_checksum_alg;
+
+  char *save_buf= NULL; // needed for checksumming the fake Rotate event
+  char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
+
+  DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF || 
+              checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || 
+              checksum_alg == BINLOG_CHECKSUM_ALG_CRC32); 
+
   DBUG_ENTER("queue_event");
+  /*
+    FD_queue checksum alg description does not apply in a case of
+    FD itself. The one carries both parts of the checksum data.
+  */
+  if (buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT)
+  {
+    checksum_alg= get_checksum_alg(buf, event_len);
+  }
+  else if (buf[EVENT_TYPE_OFFSET] == START_EVENT_V3)
+  {
+    // checksum behaviour is similar to the pre-checksum FD handling
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
+    mi->rli.relay_log.description_event_for_queue->checksum_alg=
+      mi->rli.relay_log.relay_log_checksum_alg= checksum_alg=
+      BINLOG_CHECKSUM_ALG_OFF;
+  }
+
+  // does not hold always because of old binlog can work with NM 
+  // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
+
+  // should hold unless manipulations with RL. Tests that do that
+  // will have to refine the clause.
+  DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF);
+              
+  // Emulate the network corruption
+  DBUG_EXECUTE_IF("corrupt_queue_event",
+    if (buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
+    {
+      char *debug_event_buf_c = (char*) buf;
+      int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
+      debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
+      DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
+      DBUG_SET("-d,corrupt_queue_event");
+    }
+  );
+                                              
+  if (event_checksum_test((uchar *) buf, event_len, checksum_alg))
+  {
+    error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
+    unlock_data_lock= FALSE;
+    goto err;
+  }
 
   LINT_INIT(inc_pos);
 
   if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
-      buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
+      (uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
     DBUG_RETURN(queue_old_event(mi,buf,event_len));
 
   LINT_INIT(inc_pos);
   mysql_mutex_lock(&mi->data_lock);
 
-  switch (buf[EVENT_TYPE_OFFSET]) {
+  switch ((uchar)buf[EVENT_TYPE_OFFSET]) {
   case STOP_EVENT:
     /*
       We needn't write this event to the relay log. Indeed, it just indicates a
@@ -3886,12 +4151,67 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
     goto err;
   case ROTATE_EVENT:
   {
-    Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue);
-    if (unlikely(process_io_rotate(mi,&rev)))
+    Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
+                         event_len - BINLOG_CHECKSUM_LEN : event_len,
+                         mi->rli.relay_log.description_event_for_queue);
+
+    if (unlikely(process_io_rotate(mi, &rev)))
     {
       error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
       goto err;
     }
+    /* 
+       Checksum special cases for the fake Rotate (R_f) event caused by the protocol
+       of events generation and serialization in RL where Rotate of master is 
+       queued right next to FD of slave.
+       Since it's only FD that carries the alg desc of FD_s has to apply to R_m.
+       Two special rules apply only to the first R_f which comes in before any FD_m.
+       The 2nd R_f should be compatible with the FD_s that must have taken over
+       the last seen FD_m's (A).
+       
+       RSC_1: If OM \and fake Rotate \and slave is configured to
+              to compute checksum for its first FD event for RL
+              the fake Rotate gets checksummed here.
+    */
+    if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF &&
+        mi->rli.relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
+    {
+      ha_checksum rot_crc= my_checksum(0L, NULL, 0);
+      event_len += BINLOG_CHECKSUM_LEN;
+      memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN);
+      int4store(&rot_buf[EVENT_LEN_OFFSET],
+                uint4korr(&rot_buf[EVENT_LEN_OFFSET]) + BINLOG_CHECKSUM_LEN);
+      rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf,
+                           event_len - BINLOG_CHECKSUM_LEN);
+      int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc);
+      DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
+      DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                  mi->rli.relay_log.relay_log_checksum_alg);
+      /* the first one */
+      DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
+      save_buf= (char *) buf;
+      buf= rot_buf;
+    }
+    else
+      /*
+        RSC_2: If NM \and fake Rotate \and slave does not compute checksum
+        the fake Rotate's checksum is stripped off before relay-logging.
+      */
+      if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+          mi->rli.relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF)
+      {
+        event_len -= BINLOG_CHECKSUM_LEN;
+        memcpy(rot_buf, buf, event_len);
+        int4store(&rot_buf[EVENT_LEN_OFFSET],
+                  uint4korr(&rot_buf[EVENT_LEN_OFFSET]) - BINLOG_CHECKSUM_LEN);
+        DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
+        DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                    mi->rli.relay_log.relay_log_checksum_alg);
+        /* the first one */
+        DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
+        save_buf= (char *) buf;
+        buf= rot_buf;
+      }
     /*
       Now the I/O thread has just changed its mi->master_log_name, so
       incrementing mi->master_log_pos is nonsense.
@@ -3912,15 +4232,24 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
     */
     Format_description_log_event* tmp;
     const char* errmsg;
+    // mark it as undefined that is irrelevant anymore
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
     if (!(tmp= (Format_description_log_event*)
           Log_event::read_log_event(buf, event_len, &errmsg,
-                                    mi->rli.relay_log.description_event_for_queue)))
+                                    mi->rli.relay_log.description_event_for_queue,
+                                    1)))
     {
       error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
       goto err;
     }
     delete mi->rli.relay_log.description_event_for_queue;
     mi->rli.relay_log.description_event_for_queue= tmp;
+    if (tmp->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+      tmp->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
+
+    /* installing new value of checksum Alg for relay log */
+    mi->rli.relay_log.relay_log_checksum_alg= tmp->checksum_alg;
+
     /*
        Though this does some conversion to the slave's format, this will
        preserve the master's binlog format version, and number of event types.
@@ -4064,13 +4393,16 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
       error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
     }
     rli->ign_master_log_name_end[0]= 0; // last event is not ignored
+    if (save_buf != NULL)
+      buf= save_buf;
   }
   mysql_mutex_unlock(log_lock);
 
 skip_relay_logging:
   
 err:
-  mysql_mutex_unlock(&mi->data_lock);
+  if (unlock_data_lock)
+    mysql_mutex_unlock(&mi->data_lock);
   DBUG_PRINT("info", ("error: %d", error));
   if (error)
     mi->report(ERROR_LEVEL, error, ER(error), 
@@ -4220,10 +4552,11 @@ static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
       suppress_warnings= 0;
       mi->report(ERROR_LEVEL, last_errno,
                  "error %s to master '%s@%s:%d'"
-                 " - retry-time: %d  retries: %lu",
+                 " - retry-time: %d  retries: %lu  message: %s",
                  (reconnect ? "reconnecting" : "connecting"),
                  mi->user, mi->host, mi->port,
-                 mi->connect_retry, master_retry_count);
+                 mi->connect_retry, master_retry_count,
+                 mysql_error(mysql));
     }
     /*
       By default we try forever. The reason is that failure will trigger
@@ -4395,11 +4728,11 @@ bool flush_relay_log_info(Relay_log_info* rli)
   my_b_seek(file, 0L);
   pos=strmov(buff, rli->group_relay_log_name);
   *pos++='\n';
-  pos=longlong2str(rli->group_relay_log_pos, pos, 10);
+  pos= longlong10_to_str(rli->group_relay_log_pos, pos, 10);
   *pos++='\n';
   pos=strmov(pos, rli->group_master_log_name);
   *pos++='\n';
-  pos=longlong2str(rli->group_master_log_pos, pos, 10);
+  pos=longlong10_to_str(rli->group_master_log_pos, pos, 10);
   *pos='\n';
   if (my_b_write(file, (uchar*) buff, (size_t) (pos-buff)+1))
     error=1;
@@ -4544,8 +4877,9 @@ static Log_event* next_event(Relay_log_info* rli)
       But if the relay log is created by new_file(): then the solution is:
       MYSQL_BIN_LOG::open() will write the buffered description event.
     */
-    if ((ev=Log_event::read_log_event(cur_log,0,
-                                      rli->relay_log.description_event_for_exec)))
+    if ((ev= Log_event::read_log_event(cur_log,0,
+                                       rli->relay_log.description_event_for_exec,
+                                       opt_slave_sql_verify_checksum)))
 
     {
       DBUG_ASSERT(thd==rli->sql_thd);
@@ -4947,9 +5281,9 @@ bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
     {37426, { 5, 1,  0 }, { 5, 1, 26 } },
   };
   const uchar *master_ver=
-    rli->relay_log.description_event_for_exec->server_version_split;
+    rli->relay_log.description_event_for_exec->server_version_split.ver;
 
-  DBUG_ASSERT(sizeof(rli->relay_log.description_event_for_exec->server_version_split) == 3);
+  DBUG_ASSERT(sizeof(rli->relay_log.description_event_for_exec->server_version_split.ver) == 3);
 
   for (uint i= 0;
        i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
diff --git a/sql/slave.h b/sql/slave.h
index 0374a5d27ae..6c1305e2ee0 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -48,6 +48,11 @@
 class Relay_log_info;
 class Master_info;
 
+int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
+int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
+                          const char *default_val);
+int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val);
+int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f);
 
 /*****************************************************************************
 
@@ -111,11 +116,13 @@ extern MY_BITMAP slave_error_mask;
 extern char slave_skip_error_names[];
 extern bool use_slave_mask;
 extern char *slave_load_tmpdir;
-extern char *master_info_file, *relay_log_info_file;
+extern char *master_info_file;
+extern MYSQL_PLUGIN_IMPORT char *relay_log_info_file;
 extern char *opt_relay_logname, *opt_relaylog_index_name;
 extern my_bool opt_skip_slave_start, opt_reckless_slave;
 extern my_bool opt_log_slave_updates;
 extern char *opt_slave_skip_errors;
+extern my_bool opt_replicate_annotate_rows_events;
 extern ulonglong relay_log_space_limit;
 
 /*
@@ -222,7 +229,7 @@ extern int disconnect_slave_event_count, abort_slave_event_count ;
 /* the master variables are defaults read from my.cnf or command line */
 extern uint master_port, master_connect_retry, report_port;
 extern char * master_user, *master_password, *master_host;
-extern char *master_info_file, *relay_log_info_file, *report_user;
+extern char *master_info_file, *report_user;
 extern char *report_host, *report_password;
 
 extern my_bool master_ssl;
@@ -231,6 +238,8 @@ extern char *master_ssl_cipher, *master_ssl_key;
        
 extern I_List<THD> threads;
 
+#else
+#define close_active_mi() /* no-op */
 #endif /* HAVE_REPLICATION */
 
 /* masks for start/stop operations on io and sql slave threads */
diff --git a/sql/sp.cc b/sql/sp.cc
index b257e23c8c7..b693569753f 100644
--- a/sql/sp.cc
+++ b/sql/sp.cc
@@ -129,7 +129,8 @@ TABLE_FIELD_TYPE proc_table_fields[MYSQL_PROC_FIELD_COUNT] =
   {
     { C_STRING_WITH_LEN("sql_mode") },
     { C_STRING_WITH_LEN("set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES',"
-    "'IGNORE_SPACE','NOT_USED','ONLY_FULL_GROUP_BY','NO_UNSIGNED_SUBTRACTION',"
+    "'IGNORE_SPACE','IGNORE_BAD_TABLE_OPTIONS','ONLY_FULL_GROUP_BY',"
+    "'NO_UNSIGNED_SUBTRACTION',"
     "'NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB',"
     "'NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40',"
     "'ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES',"
@@ -497,8 +498,9 @@ db_find_routine_aux(THD *thd, int type, sp_name *name, TABLE *table)
   key_copy(key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0, key, HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
     DBUG_RETURN(SP_KEY_NOT_FOUND);
 
   DBUG_RETURN(SP_OK);
@@ -803,7 +805,7 @@ db_load_routine(THD *thd, int type, sp_name *name, sp_head **sphp,
 {
   LEX *old_lex= thd->lex, newlex;
   String defstr;
-  char saved_cur_db_name_buf[NAME_LEN+1];
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
   LEX_STRING saved_cur_db_name=
     { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
   bool cur_db_changed;
@@ -1187,7 +1189,7 @@ sp_create_routine(THD *thd, int type, sp_head *sp)
                          (sp->m_explicit_name ? sp->m_db.length : 0), 
                          sp->m_name.str, sp->m_name.length,
                          sp->m_params.str, sp->m_params.length,
-                         retstr.c_ptr(), retstr.length(),
+                         retstr.ptr(), retstr.length(),
                          sp->m_body.str, sp->m_body.length,
                          sp->m_chistics, &(thd->lex->definer->user),
                          &(thd->lex->definer->host),
@@ -1200,7 +1202,7 @@ sp_create_routine(THD *thd, int type, sp_head *sp)
       thd->variables.sql_mode= saved_mode;
       /* Such a statement can always go directly to binlog, no trans cache */
       if (thd->binlog_query(THD::STMT_QUERY_TYPE,
-                            log_query.c_ptr(), log_query.length(),
+                            log_query.ptr(), log_query.length(),
                             FALSE, FALSE, FALSE, 0))
         ret= SP_INTERNAL_ERROR;
       thd->variables.sql_mode= 0;
@@ -1446,6 +1448,7 @@ bool lock_db_routines(THD *thd, char *db)
   Open_tables_backup open_tables_state_backup;
   MDL_request_list mdl_requests;
   Lock_db_routines_error_handler err_handler;
+  uchar keybuf[MAX_KEY_LENGTH];
   DBUG_ENTER("lock_db_routines");
 
   /*
@@ -1468,11 +1471,11 @@ bool lock_db_routines(THD *thd, char *db)
 
   table->field[MYSQL_PROC_FIELD_DB]->store(db, strlen(db), system_charset_info);
   key_len= table->key_info->key_part[0].store_length;
+  table->field[MYSQL_PROC_FIELD_DB]->get_key_image(keybuf, key_len, Field::itRAW);
   table->file->ha_index_init(0, 1);
 
-  if (! table->file->index_read_map(table->record[0],
-                                    table->field[MYSQL_PROC_FIELD_DB]->ptr,
-                                    (key_part_map)1, HA_READ_KEY_EXACT))
+  if (! table->file->ha_index_read_map(table->record[0], keybuf, (key_part_map)1,
+                                       HA_READ_KEY_EXACT))
   {
     do
     {
@@ -1484,9 +1487,7 @@ bool lock_db_routines(THD *thd, char *db)
                         MDL_key::FUNCTION : MDL_key::PROCEDURE,
                         db, sp_name, MDL_EXCLUSIVE, MDL_TRANSACTION);
       mdl_requests.push_front(mdl_request);
-    } while (! (nxtres= table->file->index_next_same(table->record[0],
-                                         table->field[MYSQL_PROC_FIELD_DB]->ptr,
-						     key_len)));
+    } while (! (nxtres= table->file->ha_index_next_same(table->record[0], keybuf, key_len)));
   }
   table->file->ha_index_end();
   if (nxtres != 0 && nxtres != HA_ERR_END_OF_FILE)
@@ -1521,6 +1522,7 @@ sp_drop_db_routines(THD *thd, char *db)
   int ret;
   uint key_len;
   MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint();
+  uchar keybuf[MAX_KEY_LENGTH];
   DBUG_ENTER("sp_drop_db_routines");
   DBUG_PRINT("enter", ("db: %s", db));
 
@@ -1530,12 +1532,12 @@ sp_drop_db_routines(THD *thd, char *db)
 
   table->field[MYSQL_PROC_FIELD_DB]->store(db, strlen(db), system_charset_info);
   key_len= table->key_info->key_part[0].store_length;
+  table->field[MYSQL_PROC_FIELD_DB]->get_key_image(keybuf, key_len, Field::itRAW);
 
   ret= SP_OK;
   table->file->ha_index_init(0, 1);
-  if (! table->file->index_read_map(table->record[0],
-                                    (uchar *)table->field[MYSQL_PROC_FIELD_DB]->ptr,
-                                    (key_part_map)1, HA_READ_KEY_EXACT))
+  if (!table->file->ha_index_read_map(table->record[0], keybuf, (key_part_map)1,
+                                      HA_READ_KEY_EXACT))
   {
     int nxtres;
     bool deleted= FALSE;
@@ -1550,9 +1552,8 @@ sp_drop_db_routines(THD *thd, char *db)
 	nxtres= 0;
 	break;
       }
-    } while (! (nxtres= table->file->index_next_same(table->record[0],
-                                (uchar *)table->field[MYSQL_PROC_FIELD_DB]->ptr,
-						     key_len)));
+    } while (!(nxtres= table->file->ha_index_next_same(table->record[0],
+                                                       keybuf, key_len)));
     if (nxtres != HA_ERR_END_OF_FILE)
       ret= SP_KEY_NOT_FOUND;
     if (deleted)
diff --git a/sql/sp_cache.cc b/sql/sp_cache.cc
index 8972303ef03..f0875b9c3f4 100644
--- a/sql/sp_cache.cc
+++ b/sql/sp_cache.cc
@@ -121,6 +121,12 @@ void sp_cache_clear(sp_cache **cp)
 }
 
 
+void sp_cache_end()
+{
+  mysql_mutex_destroy(&Cversion_lock);
+}
+
+
 /*
   Insert a routine into the cache.
 
diff --git a/sql/sp_cache.h b/sql/sp_cache.h
index 39448568a76..7c5d1dca6ef 100644
--- a/sql/sp_cache.h
+++ b/sql/sp_cache.h
@@ -56,6 +56,7 @@ class sp_name;
 */
 
 void sp_cache_init();
+void sp_cache_end();
 void sp_cache_clear(sp_cache **cp);
 void sp_cache_insert(sp_cache **cp, sp_head *sp);
 sp_head *sp_cache_lookup(sp_cache **cp, sp_name *name);
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index ce713504a38..ff104d04aec 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -27,6 +27,7 @@
 #include "sql_acl.h"           // *_ACL
 #include "sql_array.h"         // Dynamic_array
 #include "log_event.h"         // append_query_string, Query_log_event
+#include "sql_derived.h"       // mysql_handle_derived
 
 #ifdef USE_PRAGMA_IMPLEMENTATION
 #pragma implementation
@@ -63,19 +64,7 @@ extern "C" uchar *sp_table_key(const uchar *ptr, size_t *plen, my_bool first);
 static void reset_start_time_for_sp(THD *thd)
 {
   if (!thd->in_sub_stmt)
-  {
-    /*
-      First investigate if there is a cached time stamp
-    */
-    if (thd->user_time)
-    {
-      thd->start_time= thd->user_time;
-    }
-    else
-    {
-      my_micro_time_and_time(&thd->start_time);
-    }
-  }
+    thd->set_start_time();
 }
 
 Item_result
@@ -841,6 +830,8 @@ sp_head::create_result_field(uint field_max_length, const char *field_name,
                       m_return_field_def.interval,
                       field_name ? field_name : (const char *) m_name.str);
 
+  field->vcol_info= m_return_field_def.vcol_info;
+  field->stored_in_db= m_return_field_def.stored_in_db;
   if (field)
     field->init(table);
 
@@ -1197,7 +1188,7 @@ bool
 sp_head::execute(THD *thd, bool merge_da_on_success)
 {
   DBUG_ENTER("sp_head::execute");
-  char saved_cur_db_name_buf[NAME_LEN+1];
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
   LEX_STRING saved_cur_db_name=
     { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
   bool cur_db_changed= FALSE;
@@ -2028,7 +2019,7 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
   ulonglong utime_before_sp_exec= thd->utime_after_lock;
   sp_rcontext *save_spcont, *octx;
   sp_rcontext *nctx = NULL;
-  bool save_enable_slow_log= false;
+  bool save_enable_slow_log;
   bool save_log_general= false;
   DBUG_ENTER("sp_head::execute_procedure");
   DBUG_PRINT("info", ("procedure %s", m_name.str));
@@ -2106,9 +2097,10 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
       if (spvar->mode == sp_param_out)
       {
         Item_null *null_item= new Item_null();
+        Item *tmp_item= null_item;
 
         if (!null_item ||
-            nctx->set_variable(thd, i, (Item **)&null_item))
+            nctx->set_variable(thd, i, &tmp_item))
         {
           err_status= TRUE;
           break;
@@ -2152,10 +2144,10 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
     DBUG_PRINT("info",(" %.*s: eval args done", (int) m_name.length, 
                        m_name.str));
   }
-  if (!(m_flags & LOG_SLOW_STATEMENTS) && thd->enable_slow_log)
+  save_enable_slow_log= thd->enable_slow_log;
+  if (!(m_flags & LOG_SLOW_STATEMENTS) && save_enable_slow_log)
   {
     DBUG_PRINT("info", ("Disabling slow log for the execution"));
-    save_enable_slow_log= true;
     thd->enable_slow_log= FALSE;
   }
   if (!(m_flags & LOG_GENERAL_LOG) && !(thd->variables.option_bits & OPTION_LOG_OFF))
@@ -2178,8 +2170,7 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
 
   if (save_log_general)
     thd->variables.option_bits &= ~OPTION_LOG_OFF;
-  if (save_enable_slow_log)
-    thd->enable_slow_log= true;
+  thd->enable_slow_log= save_enable_slow_log;
   /*
     In the case when we weren't able to employ reuse mechanism for
     OUT/INOUT paranmeters, we should reallocate memory. This
@@ -2300,6 +2291,8 @@ sp_head::reset_lex(THD *thd)
   sublex->dec= NULL;
   sublex->interval_list.empty();
   sublex->type= 0;
+  sublex->uint_geom_type= 0;
+  sublex->vcol_info= 0;
 
   /* Reset part of parser state which needs this. */
   thd->m_parser_state->m_yacc.reset_before_substatement();
@@ -2427,7 +2420,8 @@ sp_head::fill_field_definition(THD *thd, LEX *lex,
                       &lex->interval_list,
                       lex->charset ? lex->charset :
                                      thd->variables.collation_database,
-                      lex->uint_geom_type))
+                      lex->uint_geom_type,
+		      lex->vcol_info, NULL))
     return TRUE;
 
   if (field_def->interval_list.elements)
@@ -3063,6 +3057,9 @@ int sp_instr::exec_open_and_lock_tables(THD *thd, TABLE_LIST *tables)
     result= -1;
   else
     result= 0;
+  /* Prepare all derived tables/views to catch possible errors. */
+  if (!result)
+    result= mysql_handle_derived(thd->lex, DT_PREPARE) ? -1 : 0;
 
   return result;
 }
@@ -4051,7 +4048,7 @@ sp_head::merge_table_list(THD *thd, TABLE_LIST *table, LEX *lex_for_tmp_check)
   for (; table ; table= table->next_global)
     if (!table->derived && !table->schema_table)
     {
-      char tname[(NAME_LEN + 1) * 3];           // db\0table\0alias\0
+      char tname[(SAFE_NAME_LEN + 1) * 3];           // db\0table\0alias\0
       uint tlen, alen;
 
       tlen= table->db_length;
diff --git a/sql/sp_rcontext.cc b/sql/sp_rcontext.cc
index fba6a56b37c..74bee6e856c 100644
--- a/sql/sp_rcontext.cc
+++ b/sql/sp_rcontext.cc
@@ -123,7 +123,7 @@ sp_rcontext::init_var_table(THD *thd)
     return TRUE;
 
   m_var_table->copy_blobs= TRUE;
-  m_var_table->alias= "";
+  m_var_table->alias.set("", 0, table_alias_charset);
 
   return FALSE;
 }
@@ -716,7 +716,7 @@ int Select_fetch_into_spvars::prepare(List<Item> &fields, SELECT_LEX_UNIT *u)
 }
 
 
-bool Select_fetch_into_spvars::send_data(List<Item> &items)
+int Select_fetch_into_spvars::send_data(List<Item> &items)
 {
   List_iterator_fast<struct sp_variable> spvar_iter(*spvar_list);
   List_iterator_fast<Item> item_iter(items);
@@ -733,7 +733,7 @@ bool Select_fetch_into_spvars::send_data(List<Item> &items)
   for (; spvar= spvar_iter++, item= item_iter++; )
   {
     if (thd->spcont->set_variable(thd, spvar->offset, &item))
-      return TRUE;
+      return 1;
   }
-  return FALSE;
+  return 0;
 }
diff --git a/sql/sp_rcontext.h b/sql/sp_rcontext.h
index 84d5a1227fe..3d976f94381 100644
--- a/sql/sp_rcontext.h
+++ b/sql/sp_rcontext.h
@@ -275,7 +275,7 @@ public:
   void set_spvar_list(List<struct sp_variable> *vars) { spvar_list= vars; }
 
   virtual bool send_eof() { return FALSE; }
-  virtual bool send_data(List<Item> &items);
+  virtual int send_data(List<Item> &items);
   virtual int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
 };
 
@@ -302,7 +302,7 @@ public:
   int
   close(THD *thd);
 
-  inline my_bool
+  inline bool
   is_open()
   {
     return test(server_side_cursor);
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index 3f236dd672f..1f34712b044 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -532,11 +533,11 @@ static uchar* acl_entry_get_key(acl_entry *entry, size_t *length,
   second packet?
 */
 #define SSL_HANDSHAKE_SIZE      2
-#define NORMAL_HANDSHAKE_SIZE   6
 #define MIN_HANDSHAKE_SIZE      2
 #else
 #define MIN_HANDSHAKE_SIZE      6
 #endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
+#define NORMAL_HANDSHAKE_SIZE   6
 
 static DYNAMIC_ARRAY acl_hosts, acl_users, acl_dbs, acl_proxy_users;
 static MEM_ROOT mem, memex;
@@ -553,9 +554,9 @@ static void init_check_host(void);
 static void rebuild_check_host(void);
 static ACL_USER *find_acl_user(const char *host, const char *user,
                                my_bool exact);
-static bool update_user_table(THD *thd, TABLE *table,
-                              const char *host, const char *user,
-			      const char *new_password, uint new_password_len);
+static bool update_user_table(THD *thd, TABLE *table, const char *host,
+                              const char *user, const char *new_password,
+                              uint new_password_len);
 static my_bool acl_load(THD *thd, TABLE_LIST *tables);
 static my_bool grant_load(THD *thd, TABLE_LIST *tables);
 static inline void get_grantor(THD *thd, char* grantor);
@@ -583,6 +584,49 @@ set_user_salt(ACL_USER *acl_user, const char *password, uint password_len)
     acl_user->salt_len= 0;
 }
 
+static char *fix_plugin_ptr(char *name)
+{
+  if (my_strcasecmp(system_charset_info, name,
+                    native_password_plugin_name.str) == 0)
+    return native_password_plugin_name.str;
+  else
+  if (my_strcasecmp(system_charset_info, name,
+                    old_password_plugin_name.str) == 0)
+    return old_password_plugin_name.str;
+  else
+    return name;
+}
+
+/**
+  Fix ACL::plugin pointer to point to a hard-coded string, if appropriate
+
+  Make sure that if ACL_USER's plugin is a built-in, then it points
+  to a hard coded string, not to an allocated copy. Run-time, for
+  authentication, we want to be able to detect built-ins by comparing
+  pointers, not strings.
+
+  Additionally - update the salt if the plugin is built-in.
+
+  @retval 0 the pointers were fixed
+  @retval 1 this ACL_USER uses a not built-in plugin
+*/
+static bool fix_user_plugin_ptr(ACL_USER *user)
+{
+  user->salt_len= 0;
+  if (my_strcasecmp(system_charset_info, user->plugin.str,
+                    native_password_plugin_name.str) == 0)
+    user->plugin= native_password_plugin_name;
+  else
+  if (my_strcasecmp(system_charset_info, user->plugin.str,
+                    old_password_plugin_name.str) == 0)
+    user->plugin= old_password_plugin_name;
+  else
+    return true;
+  
+  set_user_salt(user, user->auth_string.str, user->auth_string.length);
+  return false;
+}
+
 /*
   Initialize structures responsible for user/db-level privilege checking and
   load privilege information for them from tables in the 'mysql' database.
@@ -664,13 +708,6 @@ set_user_plugin (ACL_USER *user, int password_len)
   case SCRAMBLED_PASSWORD_CHAR_LENGTH_323:
     user->plugin= old_password_plugin_name;
     return FALSE;
-  case 45: /* 4.1: to be removed */
-    sql_print_warning("Found 4.1.0 style password for user '%s@%s'. "
-                      "Ignoring user. "
-                      "You should change password for this user.",
-                      user->user ? user->user : "",
-                      user->host.hostname ? user->host.hostname : "");
-    return TRUE;
   default:
     sql_print_warning("Found invalid password for user: '%s@%s'; "
                       "Ignoring user", user->user ? user->user : "",
@@ -701,7 +738,7 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   READ_RECORD read_record_info;
   my_bool return_val= TRUE;
   bool check_no_resolve= specialflag & SPECIAL_NO_RESOLVE;
-  char tmp_name[NAME_LEN+1];
+  char tmp_name[SAFE_NAME_LEN+1];
   int password_length;
   ulong old_sql_mode= thd->variables.sql_mode;
   DBUG_ENTER("acl_load");
@@ -713,8 +750,10 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   acl_cache->clear(1);				// Clear locked hostname cache
 
   init_sql_alloc(&mem, ACL_ALLOC_BLOCK_SIZE, 0);
-  init_read_record(&read_record_info,thd,table= tables[0].table,NULL,1,0, 
-                   FALSE);
+  if (init_read_record(&read_record_info,thd,table= tables[0].table,NULL,1,0, 
+                       FALSE))
+    goto end;
+
   table->use_all_columns();
   (void) my_init_dynamic_array(&acl_hosts,sizeof(ACL_HOST),20,50);
   while (!(read_record_info.read_record(&read_record_info)))
@@ -763,7 +802,10 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   end_read_record(&read_record_info);
   freeze_size(&acl_hosts);
 
-  init_read_record(&read_record_info,thd,table=tables[1].table,NULL,1,0,FALSE);
+  if (init_read_record(&read_record_info,thd,table=tables[1].table,NULL,1,0,
+                       FALSE))
+    goto end;
+
   table->use_all_columns();
   (void) my_init_dynamic_array(&acl_users,sizeof(ACL_USER),50,100);
   password_length= table->field[2]->field_length /
@@ -825,6 +867,8 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
 
     char *password= get_field(&mem, table->field[2]);
     uint password_len= password ? strlen(password) : 0;
+    user.auth_string.str= password ? password : const_cast<char*>("");
+    user.auth_string.length= password_len;
     set_user_salt(&user, password, password_len);
 
     if (set_user_plugin(&user, password_len))
@@ -913,7 +957,9 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
           char *tmpstr= get_field(&mem, table->field[next_field++]);
           if (tmpstr)
           {
-            if (password_len)
+            user.plugin.str= tmpstr;
+            user.plugin.length= strlen(user.plugin.str);
+            if (user.auth_string.length)
             {
               sql_print_warning("'user' entry '%s@%s' has both a password "
                                 "and an authentication plugin specified. The "
@@ -921,22 +967,12 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
                                 user.user ? user.user : "",
                                 user.host.hostname ? user.host.hostname : "");
             }
-            if (my_strcasecmp(system_charset_info, tmpstr,
-                              native_password_plugin_name.str) == 0)
-              user.plugin= native_password_plugin_name;
-            else
-              if (my_strcasecmp(system_charset_info, tmpstr,
-                                old_password_plugin_name.str) == 0)
-                user.plugin= old_password_plugin_name;
-              else
-              {
-                user.plugin.str= tmpstr;
-                user.plugin.length= strlen(tmpstr);
-              }
             user.auth_string.str= get_field(&mem, table->field[next_field++]);
             if (!user.auth_string.str)
               user.auth_string.str= const_cast<char*>("");
             user.auth_string.length= strlen(user.auth_string.str);
+
+            fix_user_plugin_ptr(&user);
           }
         }
       }
@@ -968,7 +1004,10 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   end_read_record(&read_record_info);
   freeze_size(&acl_users);
 
-  init_read_record(&read_record_info,thd,table=tables[2].table,NULL,1,0,FALSE);
+  if (init_read_record(&read_record_info,thd,table=tables[2].table,NULL,1,0,
+                       FALSE))
+    goto end;
+
   table->use_all_columns();
   (void) my_init_dynamic_array(&acl_dbs,sizeof(ACL_DB),50,100);
   while (!(read_record_info.read_record(&read_record_info)))
@@ -1413,16 +1452,25 @@ static void acl_update_user(const char *user, const char *host,
     {
       if ((!acl_user->host.hostname && !host[0]) ||
 	  (acl_user->host.hostname &&
-	  !my_strcasecmp(system_charset_info, host, acl_user->host.hostname)))
+           !my_strcasecmp(system_charset_info, host, acl_user->host.hostname)))
       {
         if (plugin->str[0])
         {
-          acl_user->plugin.str= strmake_root(&mem, plugin->str, plugin->length);
-          acl_user->plugin.length= plugin->length;
+          acl_user->plugin= *plugin;
           acl_user->auth_string.str= auth->str ?
             strmake_root(&mem, auth->str, auth->length) : const_cast<char*>("");
           acl_user->auth_string.length= auth->length;
+          if (fix_user_plugin_ptr(acl_user))
+            acl_user->plugin.str= strmake_root(&mem, plugin->str, plugin->length);
         }
+        else
+          if (password[0])
+          {
+            acl_user->auth_string.str= strmake_root(&mem, password, password_len);
+            acl_user->auth_string.length= password_len;
+            set_user_salt(acl_user, password, password_len);
+            set_user_plugin(acl_user, password_len);
+          }
 	acl_user->access=privileges;
 	if (mqh->specified_limits & USER_RESOURCES::QUERIES_PER_HOUR)
 	  acl_user->user_resource.questions=mqh->questions;
@@ -1442,8 +1490,6 @@ static void acl_update_user(const char *user, const char *host,
 	  acl_user->x509_subject= (x509_subject ?
 				   strdup_root(&mem,x509_subject) : 0);
 	}
-	if (password)
-	  set_user_salt(acl_user, password, password_len);
         /* search complete: */
 	break;
       }
@@ -1471,18 +1517,19 @@ static void acl_insert_user(const char *user, const char *host,
   update_hostname(&acl_user.host, *host ? strdup_root(&mem, host): 0);
   if (plugin->str[0])
   {
-    acl_user.plugin.str= strmake_root(&mem, plugin->str, plugin->length);
-    acl_user.plugin.length= plugin->length;
+    acl_user.plugin= *plugin;
     acl_user.auth_string.str= auth->str ?
       strmake_root(&mem, auth->str, auth->length) : const_cast<char*>("");
     acl_user.auth_string.length= auth->length;
+    if (fix_user_plugin_ptr(&acl_user))
+      acl_user.plugin.str= strmake_root(&mem, plugin->str, plugin->length);
   }
   else
   {
-    acl_user.plugin= password_len == SCRAMBLED_PASSWORD_CHAR_LENGTH_323 ?
-      old_password_plugin_name : native_password_plugin_name;
-    acl_user.auth_string.str= const_cast<char*>("");
-    acl_user.auth_string.length= 0;
+    acl_user.auth_string.str= strmake_root(&mem, password, password_len);
+    acl_user.auth_string.length= password_len;
+    set_user_salt(&acl_user, password, password_len);
+    set_user_plugin(&acl_user, password_len);
   }
 
   acl_user.access=privileges;
@@ -1495,8 +1542,6 @@ static void acl_insert_user(const char *user, const char *host,
   acl_user.x509_issuer= x509_issuer  ? strdup_root(&mem,x509_issuer) : 0;
   acl_user.x509_subject=x509_subject ? strdup_root(&mem,x509_subject) : 0;
 
-  set_user_salt(&acl_user, password, password_len);
-
   (void) push_dynamic(&acl_users,(uchar*) &acl_user);
   if (!acl_user.host.hostname ||
       (acl_user.host.hostname[0] == wild_many && !acl_user.host.hostname[1]))
@@ -1523,10 +1568,11 @@ static void acl_update_db(const char *user, const char *host, const char *db,
     {
       if ((!acl_db->host.hostname && !host[0]) ||
 	  (acl_db->host.hostname &&
-          !strcmp(host, acl_db->host.hostname)))
+	   !strcmp(host, acl_db->host.hostname)))
       {
 	if ((!acl_db->db && !db[0]) ||
 	    (acl_db->db && !strcmp(db,acl_db->db)))
+
 	{
 	  if (privileges)
 	    acl_db->access=privileges;
@@ -1882,16 +1928,17 @@ bool change_password(THD *thd, const char *host, const char *user,
   }
 
   /* update loaded acl entry: */
-  set_user_salt(acl_user, new_password, new_password_len);
-
-  if (my_strcasecmp(system_charset_info, acl_user->plugin.str,
-                    native_password_plugin_name.str) &&
-      my_strcasecmp(system_charset_info, acl_user->plugin.str,
-                    old_password_plugin_name.str))
+  if (acl_user->plugin.str == native_password_plugin_name.str || 
+      acl_user->plugin.str == old_password_plugin_name.str)
+  {
+    acl_user->auth_string.str= strmake_root(&mem, new_password, new_password_len);
+    acl_user->auth_string.length= new_password_len;
+    set_user_salt(acl_user, new_password, new_password_len);
+    set_user_plugin(acl_user, new_password_len);
+  }
+  else
     push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
                  ER_SET_PASSWORD_AUTH_PLUGIN, ER(ER_SET_PASSWORD_AUTH_PLUGIN));
-  else
-    set_user_plugin(acl_user, new_password_len);
 
   if (update_user_table(thd, table,
 			acl_user->host.hostname ? acl_user->host.hostname : "",
@@ -1907,10 +1954,11 @@ bool change_password(THD *thd, const char *host, const char *user,
   result= 0;
   if (mysql_bin_log.is_open())
   {
-    query_length= sprintf(buff, "SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'",
-                          acl_user->user ? acl_user->user : "",
-                          acl_user->host.hostname ? acl_user->host.hostname : "",
-                          new_password);
+    query_length=
+      sprintf(buff,"SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'",
+              acl_user->user ? acl_user->user : "",
+              acl_user->host.hostname ? acl_user->host.hostname : "",
+              new_password);
     thd->clear_error();
     result= thd->binlog_query(THD::STMT_QUERY_TYPE, buff, query_length,
                               FALSE, FALSE, FALSE, 0);
@@ -2156,9 +2204,9 @@ static bool update_user_table(THD *thd, TABLE *table,
   key_copy((uchar *) user_key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0,
-                                      (uchar *) user_key, HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0,
+                                         (uchar *) user_key, HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     my_message(ER_PASSWORD_NO_MATCH, ER(ER_PASSWORD_NO_MATCH),
                MYF(0));	/* purecov: deadcode */
@@ -2213,14 +2261,12 @@ static bool test_if_create_new_users(THD *thd)
   Handle GRANT commands
 ****************************************************************************/
 
-static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
+static int replace_user_table(THD *thd, TABLE *table, LEX_USER &combo,
 			      ulong rights, bool revoke_grant,
 			      bool can_create_user, bool no_auto_create)
 {
   int error = -1;
   bool old_row_exists=0;
-  const char *password= "";
-  uint password_len= 0;
   char what= (revoke_grant) ? 'N' : 'Y';
   uchar user_key[MAX_KEY_LENGTH];
   LEX *lex= thd->lex;
@@ -2236,9 +2282,9 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
       my_error(ER_PASSWD_LENGTH, MYF(0), SCRAMBLED_PASSWORD_CHAR_LENGTH);
       DBUG_RETURN(-1);
     }
-    password_len= combo.password.length;
-    password=combo.password.str;
   }
+  else
+    combo.password= empty_lex_str;
 
   table->use_all_columns();
   table->field[0]->store(combo.host.str,combo.host.length,
@@ -2248,9 +2294,9 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
   key_copy(user_key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0, user_key,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, user_key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     /* what == 'N' means revoke */
     if (what == 'N')
@@ -2271,7 +2317,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
 
       see also test_if_create_new_users()
     */
-    else if (!password_len && !combo.plugin.length && no_auto_create)
+    else if (!combo.password.length && !combo.plugin.length && no_auto_create)
     {
       my_error(ER_PASSWORD_NO_MATCH, MYF(0));
       goto end;
@@ -2296,28 +2342,11 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
                            system_charset_info);
     table->field[1]->store(combo.user.str,combo.user.length,
                            system_charset_info);
-    table->field[2]->store(password, password_len,
-                           system_charset_info);
   }
   else
   {
     old_row_exists = 1;
     store_record(table,record[1]);			// Save copy for update
-    /* what == 'N' means revoke */
-    if (combo.plugin.length && what != 'N')
-    {
-        my_error(ER_GRANT_PLUGIN_USER_EXISTS, MYF(0),
-                 static_cast<int>(combo.user.length), combo.user.str);
-        goto end;
-    }
-    if (combo.password.str)                             // If password given
-      table->field[2]->store(password, password_len, system_charset_info);
-    else if (!rights && !revoke_grant &&
-             lex->ssl_type == SSL_TYPE_NOT_SPECIFIED &&
-             !lex->mqh.specified_limits)
-    {
-      DBUG_RETURN(0);
-    }
   }
 
   /* Update table columns with new privileges */
@@ -2335,6 +2364,8 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
   }
   rights= get_access(table, 3, &next_field);
   DBUG_PRINT("info",("table fields: %d",table->s->fields));
+  if (combo.password.str[0])
+    table->field[2]->store(combo.password.str, combo.password.length, system_charset_info);
   if (table->s->fields >= 31)		/* From 4.0.0 we have more fields */
   {
     /* We write down SSL related ACL stuff */
@@ -2393,21 +2424,24 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
     mqh_used= mqh_used || mqh.questions || mqh.updates || mqh.conn_per_hour;
 
     next_field+= 4;
-    if (combo.plugin.str[0])
+    if (table->s->fields >= 41)
     {
-      if (table->s->fields >= 41 && combo.plugin.str[0])
+      table->field[next_field]->set_notnull();
+      table->field[next_field + 1]->set_notnull();
+      if (combo.plugin.str[0])
       {
+        DBUG_ASSERT(combo.password.str[0] == 0);
+        table->field[2]->reset();
         table->field[next_field]->store(combo.plugin.str, combo.plugin.length,
                                         system_charset_info);
-        table->field[next_field]->set_notnull();
         table->field[next_field + 1]->store(combo.auth.str, combo.auth.length,
                                             system_charset_info);
-        table->field[next_field + 1]->set_notnull();
       }
-      else
+      if (combo.password.str[0])
       {
-        my_error(ER_BAD_FIELD_ERROR, MYF(0), "plugin", "mysql.user");
-        goto end;
+        DBUG_ASSERT(combo.plugin.str[0] == 0);
+        table->field[next_field]->reset();
+        table->field[next_field + 1]->reset();
       }
     }
   }
@@ -2449,7 +2483,7 @@ end:
     acl_cache->clear(1);			// Clear privilege cache
     if (old_row_exists)
       acl_update_user(combo.user.str, combo.host.str,
-                      combo.password.str, password_len,
+                      combo.password.str, combo.password.length,
 		      lex->ssl_type,
 		      lex->ssl_cipher,
 		      lex->x509_issuer,
@@ -2459,7 +2493,8 @@ end:
 		      &combo.plugin,
 		      &combo.auth);
     else
-      acl_insert_user(combo.user.str, combo.host.str, password, password_len,
+      acl_insert_user(combo.user.str, combo.host.str,
+                      combo.password.str, combo.password.length,
 		      lex->ssl_type,
 		      lex->ssl_cipher,
 		      lex->x509_issuer,
@@ -2511,9 +2546,9 @@ static int replace_db_table(TABLE *table, const char *db,
   key_copy(user_key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0],0, user_key,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0],0, user_key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     if (what == 'N')
     { // no row, no revoke
@@ -2661,9 +2696,9 @@ replace_proxies_priv_table(THD *thd, TABLE *table, const LEX_USER *user,
   get_grantor(thd, grantor);
 
   table->file->ha_index_init(0, 1);
-  if (table->file->index_read_map(table->record[0], user_key,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_map(table->record[0], user_key,
+                                     HA_WHOLE_KEY,
+                                     HA_READ_KEY_EXACT))
   {
     DBUG_PRINT ("info", ("Row not found"));
     if (revoke_grant)
@@ -2903,8 +2938,9 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs)
     col_privs->field[4]->store("",0, &my_charset_latin1);
 
     col_privs->file->ha_index_init(0, 1);
-    if (col_privs->file->index_read_map(col_privs->record[0], (uchar*) key,
-                                        (key_part_map)15, HA_READ_KEY_EXACT))
+    if (col_privs->file->ha_index_read_map(col_privs->record[0], (uchar*) key,
+                                           (key_part_map)15,
+                                           HA_READ_KEY_EXACT))
     {
       cols = 0; /* purecov: deadcode */
       col_privs->file->ha_index_end();
@@ -2930,7 +2966,7 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs)
         privs= cols= 0;
         return;
       }
-    } while (!col_privs->file->index_next(col_privs->record[0]) &&
+    } while (!col_privs->file->ha_index_next(col_privs->record[0]) &&
              !key_cmp_if_same(col_privs,key,0,key_prefix_len));
     col_privs->file->ha_index_end();
   }
@@ -2965,7 +3001,7 @@ static GRANT_NAME *name_hash_search(HASH *name_hash,
                                     const char *user, const char *tname,
                                     bool exact, bool name_tolower)
 {
-  char helping [NAME_LEN*2+USERNAME_LENGTH+3], *name_ptr;
+  char helping [SAFE_NAME_LEN*2+USERNAME_LENGTH+3], *name_ptr;
   uint len;
   GRANT_NAME *grant_name,*found=0;
   HASH_SEARCH_STATE state;
@@ -3075,8 +3111,8 @@ static int replace_column_table(GRANT_TABLE *g_t,
     key_copy(user_key, table->record[0], table->key_info,
              table->key_info->key_length);
 
-    if (table->file->index_read_map(table->record[0], user_key, HA_WHOLE_KEY,
-                                    HA_READ_KEY_EXACT))
+    if (table->file->ha_index_read_map(table->record[0], user_key,
+                                       HA_WHOLE_KEY, HA_READ_KEY_EXACT))
     {
       if (revoke_grant)
       {
@@ -3157,9 +3193,9 @@ static int replace_column_table(GRANT_TABLE *g_t,
     key_copy(user_key, table->record[0], table->key_info,
              key_prefix_length);
 
-    if (table->file->index_read_map(table->record[0], user_key,
-                                    (key_part_map)15,
-                                    HA_READ_KEY_EXACT))
+    if (table->file->ha_index_read_map(table->record[0], user_key,
+                                       (key_part_map)15,
+                                       HA_READ_KEY_EXACT))
       goto end;
 
     /* Scan through all rows with the same host,db,user and table */
@@ -3210,7 +3246,7 @@ static int replace_column_table(GRANT_TABLE *g_t,
 	    my_hash_delete(&g_t->hash_columns,(uchar*) grant_column);
 	}
       }
-    } while (!table->file->index_next(table->record[0]) &&
+    } while (!table->file->ha_index_next(table->record[0]) &&
 	     !key_cmp_if_same(table, key, 0, key_prefix_length));
   }
 
@@ -3272,9 +3308,9 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table,
   key_copy(user_key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0, user_key,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, user_key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     /*
       The following should never happen as we first check the in memory
@@ -3397,10 +3433,10 @@ static int replace_routine_table(THD *thd, GRANT_NAME *grant_name,
                          TRUE);
   store_record(table,record[1]);			// store at pos 1
 
-  if (table->file->index_read_idx_map(table->record[0], 0,
-                                      (uchar*) table->field[0]->ptr,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0,
+                                         (uchar*) table->field[0]->ptr,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     /*
       The following should never happen as we first check the in memory
@@ -3527,7 +3563,7 @@ int mysql_table_grant(THD *thd, TABLE_LIST *table_list,
       class LEX_COLUMN *column;
       List_iterator <LEX_COLUMN> column_iter(columns);
 
-      if (open_normal_and_derived_tables(thd, table_list, 0))
+      if (open_normal_and_derived_tables(thd, table_list, 0, DT_PREPARE))
         DBUG_RETURN(TRUE);
 
       while ((column = column_iter++))
@@ -3960,7 +3996,7 @@ bool mysql_grant(THD *thd, const char *db, List <LEX_USER> &list,
 {
   List_iterator <LEX_USER> str_list (list);
   LEX_USER *Str, *tmp_Str, *proxied_user= NULL;
-  char tmp_db[NAME_LEN+1];
+  char tmp_db[SAFE_NAME_LEN+1];
   bool create_new_users=0;
   TABLE_LIST tables[2];
   bool save_binlog_row_based;
@@ -4184,7 +4220,7 @@ static my_bool grant_load_procs_priv(TABLE *p_table)
   p_table->file->ha_index_init(0, 1);
   p_table->use_all_columns();
 
-  if (!p_table->file->index_first(p_table->record[0]))
+  if (!p_table->file->ha_index_first(p_table->record[0]))
   {
     memex_ptr= &memex;
     my_pthread_setspecific_ptr(THR_MALLOC, &memex_ptr);
@@ -4236,7 +4272,7 @@ static my_bool grant_load_procs_priv(TABLE *p_table)
         goto end_unlock;
       }
     }
-    while (!p_table->file->index_next(p_table->record[0]));
+    while (!p_table->file->ha_index_next(p_table->record[0]));
   }
   /* Return ok */
   return_val= 0;
@@ -4286,7 +4322,7 @@ static my_bool grant_load(THD *thd, TABLE_LIST *tables)
   t_table->use_all_columns();
   c_table->use_all_columns();
 
-  if (!t_table->file->index_first(t_table->record[0]))
+  if (!t_table->file->ha_index_first(t_table->record[0]))
   {
     memex_ptr= &memex;
     my_pthread_setspecific_ptr(THR_MALLOC, &memex_ptr);
@@ -4321,7 +4357,7 @@ static my_bool grant_load(THD *thd, TABLE_LIST *tables)
 	goto end_unlock;
       }
     }
-    while (!t_table->file->index_next(t_table->record[0]));
+    while (!t_table->file->ha_index_next(t_table->record[0]));
   }
 
   return_val=0;					// Return ok
@@ -4640,6 +4676,8 @@ err:
   {
     char command[128];
     get_privilege_desc(command, sizeof(command), want_access);
+    status_var_increment(thd->status_var.access_denied_errors);
+
     my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0),
              command,
              sctx->priv_user,
@@ -4916,7 +4954,7 @@ static bool check_grant_db_routine(THD *thd, const char *db, HASH *hash)
 bool check_grant_db(THD *thd,const char *db)
 {
   Security_context *sctx= thd->security_ctx;
-  char helping [NAME_LEN+USERNAME_LENGTH+2];
+  char helping [SAFE_NAME_LEN + USERNAME_LENGTH+2];
   uint len;
   bool error= TRUE;
 
@@ -5254,16 +5292,27 @@ bool mysql_show_grants(THD *thd,LEX_USER *lex_user)
     global.append(lex_user->host.str,lex_user->host.length,
 		  system_charset_info);
     global.append ('\'');
-    if (acl_user->salt_len)
+    if (acl_user->plugin.str == native_password_plugin_name.str ||
+        acl_user->plugin.str == old_password_plugin_name.str)
     {
-      char passwd_buff[SCRAMBLED_PASSWORD_CHAR_LENGTH+1];
-      if (acl_user->salt_len == SCRAMBLE_LENGTH)
-        make_password_from_salt(passwd_buff, acl_user->salt);
-      else
-        make_password_from_salt_323(passwd_buff, (ulong *) acl_user->salt);
-      global.append(STRING_WITH_LEN(" IDENTIFIED BY PASSWORD '"));
-      global.append(passwd_buff);
-      global.append('\'');
+      if (acl_user->auth_string.length)
+      {
+        DBUG_ASSERT(acl_user->salt_len);
+        global.append(STRING_WITH_LEN(" IDENTIFIED BY PASSWORD '"));
+        global.append(acl_user->auth_string.str, acl_user->auth_string.length);
+        global.append('\'');
+      }
+    }
+    else
+    {
+        global.append(STRING_WITH_LEN(" IDENTIFIED VIA "));
+        global.append(acl_user->plugin.str, acl_user->plugin.length);
+        if (acl_user->auth_string.length)
+        {
+          global.append(STRING_WITH_LEN(" USING '"));
+          global.append(acl_user->auth_string.str, acl_user->auth_string.length);
+          global.append('\'');
+        }
     }
     /* "show grants" SSL related stuff */
     if (acl_user->ssl_type == SSL_TYPE_ANY)
@@ -5899,9 +5948,9 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop,
                         table->key_info->key_part[1].store_length);
     key_copy(user_key, table->record[0], table->key_info, key_prefix_length);
 
-    if ((error= table->file->index_read_idx_map(table->record[0], 0,
-                                                user_key, (key_part_map)3,
-                                                HA_READ_KEY_EXACT)))
+    if ((error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                                   user_key, (key_part_map)3,
+                                                   HA_READ_KEY_EXACT)))
     {
       if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       {
@@ -5936,7 +5985,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop,
       DBUG_PRINT("info",("scan table: '%s'  search: '%s'@'%s'",
                          table->s->table_name.str, user_str, host_str));
 #endif
-      while ((error= table->file->rnd_next(table->record[0])) != 
+      while ((error= table->file->ha_rnd_next(table->record[0])) != 
              HA_ERR_END_OF_FILE)
       {
         if (error)
@@ -6040,16 +6089,16 @@ static int handle_grant_struct(uint struct_no, bool drop,
     elements= acl_dbs.elements;
     break;
   case 2:
-    elements= column_priv_hash.records;
     grant_name_hash= &column_priv_hash;
+    elements= grant_name_hash->records;
     break;
   case 3:
-    elements= proc_priv_hash.records;
     grant_name_hash= &proc_priv_hash;
+    elements= grant_name_hash->records;
     break;
   case 4:
-    elements= func_priv_hash.records;
     grant_name_hash= &func_priv_hash;
+    elements= grant_name_hash->records;
     break;
   case 5:
     elements= acl_proxy_users.elements;
@@ -6096,7 +6145,7 @@ static int handle_grant_struct(uint struct_no, bool drop,
       break;
 
     default:
-      MY_ASSERT_UNREACHABLE();
+      DBUG_ASSERT(0);
     }
     if (! user)
       user= "";
@@ -6262,8 +6311,7 @@ static int handle_grant_data(TABLE_LIST *tables, bool drop,
   else
   {
     /* Handle user array. */
-    if ((handle_grant_struct(0, drop, user_from, user_to) && ! result) ||
-        found)
+    if ((handle_grant_struct(0, drop, user_from, user_to)) || found)
     {
       result= 1; /* At least one record/element found. */
       /* If search is requested, we do not need to search further. */
@@ -7824,33 +7872,15 @@ get_cached_table_access(GRANT_INTERNAL_INFO *grant_internal_info,
 #undef HAVE_OPENSSL
 #ifdef NO_EMBEDDED_ACCESS_CHECKS
 #define initialized 0
-#define decrease_user_connections(X)        /* nothing */
-#define check_for_max_user_connections(X, Y)   0
+#define check_for_max_user_connections(X,Y)   0
+#define get_or_create_user_conn(A,B,C,D) 0
 #endif
 #endif
 #ifndef HAVE_OPENSSL
 #define ssl_acceptor_fd 0
-#define sslaccept(A,B,C) 1
+#define sslaccept(A,B,C,D) 1
 #endif
 
-
-class Thd_charset_adapter
-{
-  THD *thd;
-public:
-  Thd_charset_adapter(THD *thd_arg) : thd (thd_arg) {} 
-  bool init_client_charset(uint cs_number)
-  {
-    if (thd_init_client_charset(thd, cs_number))
-      return true;
-    thd->update_charset();
-    return thd->is_error();
-  }
-
-  CHARSET_INFO *charset() { return thd->charset(); }
-};
-
-
 /**
   The internal version of what plugins know as MYSQL_PLUGIN_VIO,
   basically the context of the authentication session
@@ -7858,7 +7888,8 @@ public:
 struct MPVIO_EXT :public MYSQL_PLUGIN_VIO
 {
   MYSQL_SERVER_AUTH_INFO auth_info;
-  const ACL_USER *acl_user;
+  THD *thd;
+  ACL_USER *acl_user;       ///< a copy, independent from acl_users array
   plugin_ref plugin;        ///< what plugin we're under
   LEX_STRING db;            ///< db name from the handshake packet
   /** when restarting a plugin this caches the last client reply */
@@ -7875,70 +7906,34 @@ struct MPVIO_EXT :public MYSQL_PLUGIN_VIO
   uint connect_errors;      ///< if there were connect errors for this host
   /** when plugin returns a failure this tells us what really happened */
   enum { SUCCESS, FAILURE, RESTART } status;
-
-  /* encapsulation members */
-  ulong client_capabilities;
-  char *scramble;
-  MEM_ROOT *mem_root;
-  struct  rand_struct *rand;
-  my_thread_id  thread_id;
-  uint      *server_status;
-  NET *net;
-  ulong max_client_packet_length;
-  char *ip;
-  char *host;
-  Thd_charset_adapter *charset_adapter;
-  LEX_STRING acl_user_plugin;
 };
 
 /**
   a helper function to report an access denied error in all the proper places
 */
-static void login_failed_error(MPVIO_EXT *mpvio, int passwd_used)
-{
-  THD *thd= current_thd;
-  if (passwd_used == 2)
-  {
-    my_error(ER_ACCESS_DENIED_NO_PASSWORD_ERROR, MYF(0),
-             mpvio->auth_info.user_name,
-             mpvio->auth_info.host_or_ip);
-    general_log_print(thd, COM_CONNECT, ER(ER_ACCESS_DENIED_NO_PASSWORD_ERROR),
-                      mpvio->auth_info.user_name,
-                      mpvio->auth_info.host_or_ip);
-    /* 
-      Log access denied messages to the error log when log-warnings = 2
-      so that the overhead of the general query log is not required to track 
-      failed connections.
-    */
-    if (global_system_variables.log_warnings > 1)
-    {
-      sql_print_warning(ER(ER_ACCESS_DENIED_NO_PASSWORD_ERROR),
-                        mpvio->auth_info.user_name,
-                        mpvio->auth_info.host_or_ip);      
-    }
-  }
-  else
+static void login_failed_error(THD *thd)
+{
+  my_error(access_denied_error_code(thd->password), MYF(0),
+           thd->main_security_ctx.user,
+           thd->main_security_ctx.host_or_ip,
+           thd->password ? ER(ER_YES) : ER(ER_NO));
+  general_log_print(thd, COM_CONNECT,
+                    ER(access_denied_error_code(thd->password)),
+                    thd->main_security_ctx.user,
+                    thd->main_security_ctx.host_or_ip,
+                    thd->password ? ER(ER_YES) : ER(ER_NO));
+  status_var_increment(thd->status_var.access_denied_errors);
+  /* 
+    Log access denied messages to the error log when log-warnings = 2
+    so that the overhead of the general query log is not required to track 
+    failed connections.
+  */
+  if (global_system_variables.log_warnings > 1)
   {
-    my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
-             mpvio->auth_info.user_name,
-             mpvio->auth_info.host_or_ip,
-             passwd_used ? ER(ER_YES) : ER(ER_NO));
-    general_log_print(thd, COM_CONNECT, ER(ER_ACCESS_DENIED_ERROR),
-                      mpvio->auth_info.user_name,
-                      mpvio->auth_info.host_or_ip,
-                      passwd_used ? ER(ER_YES) : ER(ER_NO));
-    /* 
-      Log access denied messages to the error log when log-warnings = 2
-      so that the overhead of the general query log is not required to track 
-      failed connections.
-    */
-    if (global_system_variables.log_warnings > 1)
-    {
-      sql_print_warning(ER(ER_ACCESS_DENIED_ERROR),
-                        mpvio->auth_info.user_name,
-                        mpvio->auth_info.host_or_ip,
-                        passwd_used ? ER(ER_YES) : ER(ER_NO));      
-    }
+    sql_print_warning(ER(access_denied_error_code(thd->password)),
+                      thd->main_security_ctx.user,
+                      thd->main_security_ctx.host_or_ip,
+                      thd->password ? ER(ER_YES) : ER(ER_NO));      
   }
 }
 
@@ -7973,30 +7968,30 @@ static bool send_server_handshake_packet(MPVIO_EXT *mpvio,
   DBUG_ASSERT(mpvio->status == MPVIO_EXT::FAILURE);
   DBUG_ASSERT(data_len <= 255);
 
-  char *buff= (char *) my_alloca(1 + SERVER_VERSION_LENGTH + data_len + 64);
+  THD *thd= mpvio->thd;
+  char *buff= (char *) my_alloca(1 + SERVER_VERSION_LENGTH + 1 + data_len + 64);
   char scramble_buf[SCRAMBLE_LENGTH];
   char *end= buff;
-
   DBUG_ENTER("send_server_handshake_packet");
+
   *end++= protocol_version;
 
-  mpvio->client_capabilities= CLIENT_BASIC_FLAGS;
+  thd->client_capabilities= CLIENT_BASIC_FLAGS;
 
   if (opt_using_transactions)
-    mpvio->client_capabilities|= CLIENT_TRANSACTIONS;
+    thd->client_capabilities|= CLIENT_TRANSACTIONS;
 
-  mpvio->client_capabilities|= CAN_CLIENT_COMPRESS;
+  thd->client_capabilities|= CAN_CLIENT_COMPRESS;
 
   if (ssl_acceptor_fd)
   {
-    mpvio->client_capabilities|= CLIENT_SSL;
-    mpvio->client_capabilities|= CLIENT_SSL_VERIFY_SERVER_CERT;
+    thd->client_capabilities |= CLIENT_SSL;
+    thd->client_capabilities |= CLIENT_SSL_VERIFY_SERVER_CERT;
   }
 
   if (data_len)
   {
-    mpvio->cached_server_packet.pkt= (char*) memdup_root(mpvio->mem_root, 
-                                                         data, data_len);
+    mpvio->cached_server_packet.pkt= (char*)thd->memdup(data, data_len);
     mpvio->cached_server_packet.pkt_len= data_len;
   }
 
@@ -8023,14 +8018,14 @@ static bool send_server_handshake_packet(MPVIO_EXT *mpvio,
         native_password_plugin will have to send it in a separate packet,
         adding one more round trip.
       */
-      create_random_string(mpvio->scramble, SCRAMBLE_LENGTH, mpvio->rand);
-      data= mpvio->scramble;
+      create_random_string(thd->scramble, SCRAMBLE_LENGTH, &thd->rand);
+      data= thd->scramble;
     }
     data_len= SCRAMBLE_LENGTH;
   }
 
   end= strnmov(end, server_version, SERVER_VERSION_LENGTH) + 1;
-  int4store((uchar*) end, mpvio->thread_id);
+  int4store((uchar*) end, mpvio->thd->thread_id);
   end+= 4;
 
   /*
@@ -8042,11 +8037,11 @@ static bool send_server_handshake_packet(MPVIO_EXT *mpvio,
   end+= SCRAMBLE_LENGTH_323;
   *end++= 0;
  
-  int2store(end, mpvio->client_capabilities);
+  int2store(end, thd->client_capabilities);
   /* write server characteristics: up to 16 bytes allowed */
   end[2]= (char) default_charset_info->number;
-  int2store(end + 3, mpvio->server_status[0]);
-  int2store(end + 5, mpvio->client_capabilities >> 16);
+  int2store(end+3, mpvio->thd->server_status);
+  int2store(end+5, thd->client_capabilities >> 16);
   end[7]= data_len;
   bzero(end + 8, 10);
   end+= 18;
@@ -8057,31 +8052,30 @@ static bool send_server_handshake_packet(MPVIO_EXT *mpvio,
   end= strmake(end, plugin_name(mpvio->plugin)->str,
                     plugin_name(mpvio->plugin)->length);
 
-  int res= my_net_write(mpvio->net, (uchar*) buff, (size_t) (end - buff + 1)) ||
-           net_flush(mpvio->net);
+  int res= my_net_write(&mpvio->thd->net, (uchar*) buff,
+                        (size_t) (end - buff + 1)) ||
+           net_flush(&mpvio->thd->net);
   my_afree(buff);
   DBUG_RETURN (res);
 }
 
-static bool secure_auth(MPVIO_EXT *mpvio)
+static bool secure_auth(THD *thd)
 {
-  THD *thd;
   if (!opt_secure_auth)
     return 0;
+
   /*
     If the server is running in secure auth mode, short scrambles are 
     forbidden. Extra juggling to report the same error as the old code.
   */
-
-  thd= current_thd;
-  if (mpvio->client_capabilities & CLIENT_PROTOCOL_41)
+  if (thd->client_capabilities & CLIENT_PROTOCOL_41)
   {
     my_error(ER_SERVER_IS_IN_SECURE_AUTH_MODE, MYF(0),
-             mpvio->auth_info.user_name,
-             mpvio->auth_info.host_or_ip);
+             thd->security_ctx->user,
+             thd->security_ctx->host_or_ip);
     general_log_print(thd, COM_CONNECT, ER(ER_SERVER_IS_IN_SECURE_AUTH_MODE),
-                      mpvio->auth_info.user_name,
-                      mpvio->auth_info.host_or_ip);
+                      thd->security_ctx->user,
+                      thd->security_ctx->host_or_ip);
   }
   else
   {
@@ -8117,10 +8111,10 @@ static bool send_plugin_request_packet(MPVIO_EXT *mpvio,
 {
   DBUG_ASSERT(mpvio->packets_written == 1);
   DBUG_ASSERT(mpvio->packets_read == 1);
-  NET *net= mpvio->net;
+  NET *net= &mpvio->thd->net;
   static uchar switch_plugin_request_buf[]= { 254 };
-
   DBUG_ENTER("send_plugin_request_packet");
+
   mpvio->status= MPVIO_EXT::FAILURE; // the status is no longer RESTART
 
   const char *client_auth_plugin=
@@ -8142,7 +8136,7 @@ static bool send_plugin_request_packet(MPVIO_EXT *mpvio,
     client_auth_plugin == old_password_plugin_name.str;
 
   if (switch_from_long_to_short_scramble)
-    DBUG_RETURN (secure_auth(mpvio) ||
+    DBUG_RETURN (secure_auth(mpvio->thd) ||
                  my_net_write(net, switch_plugin_request_buf, 1) ||
                  net_flush(net));
 
@@ -8152,34 +8146,16 @@ static bool send_plugin_request_packet(MPVIO_EXT *mpvio,
     ask an old 4.0 client to use the new 4.1 authentication protocol.
   */
   bool switch_from_short_to_long_scramble=
-    old_password_plugin_name.str == mpvio->cached_client_reply.plugin && 
+    old_password_plugin_name.str == mpvio->cached_client_reply.plugin &&
     client_auth_plugin == native_password_plugin_name.str;
 
   if (switch_from_short_to_long_scramble)
   {
     my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
-    general_log_print(current_thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
+    general_log_print(mpvio->thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
     DBUG_RETURN (1);
   }
 
-  /*
-    If we're dealing with an older client we can't just send a change plugin
-    packet to re-initiate the authentication handshake, because the client 
-    won't understand it. The good thing is that we don't need to : the old client
-    expects us to just check the user credentials here, which we can do by just reading
-    the cached data that are placed there by parse_com_change_user_packet() 
-    In this case we just do nothing and behave as if normal authentication
-    should continue.
-  */
-  if (!(mpvio->client_capabilities & CLIENT_PLUGIN_AUTH))
-  {
-    DBUG_PRINT("info", ("old client sent a COM_CHANGE_USER"));
-    DBUG_ASSERT(mpvio->cached_client_reply.pkt);
-    /* get the status back so the read can process the cached result */
-    mpvio->status= MPVIO_EXT::RESTART; 
-    DBUG_RETURN(0);
-  }
-
   DBUG_PRINT("info", ("requesting client to use the %s plugin", 
                       client_auth_plugin));
   DBUG_RETURN(net_write_command(net, switch_plugin_request_buf[0],
@@ -8203,26 +8179,18 @@ static bool send_plugin_request_packet(MPVIO_EXT *mpvio,
 */
 static bool find_mpvio_user(MPVIO_EXT *mpvio)
 {
+  Security_context *sctx= mpvio->thd->security_ctx;
   DBUG_ENTER("find_mpvio_user");
-  DBUG_PRINT("info", ("entry: %s", mpvio->auth_info.user_name));
   DBUG_ASSERT(mpvio->acl_user == 0);
+
   mysql_mutex_lock(&acl_cache->lock);
   for (uint i=0; i < acl_users.elements; i++)
   {
     ACL_USER *acl_user_tmp= dynamic_element(&acl_users, i, ACL_USER*);
-    if ((!acl_user_tmp->user || 
-         !strcmp(mpvio->auth_info.user_name, acl_user_tmp->user)) &&
-        compare_hostname(&acl_user_tmp->host, mpvio->host, mpvio->ip))
-    {
-      mpvio->acl_user= acl_user_tmp->copy(mpvio->mem_root);
-      if (acl_user_tmp->plugin.str == native_password_plugin_name.str ||
-          acl_user_tmp->plugin.str == old_password_plugin_name.str)
-        mpvio->acl_user_plugin= acl_user_tmp->plugin;
-      else
-        make_lex_string_root(mpvio->mem_root, 
-                             &mpvio->acl_user_plugin, 
-                             acl_user_tmp->plugin.str, 
-                             acl_user_tmp->plugin.length, 0);
+    if ((!acl_user_tmp->user || !strcmp(sctx->user, acl_user_tmp->user)) &&
+        compare_hostname(&acl_user_tmp->host, sctx->host, sctx->ip))
+    {
+      mpvio->acl_user= acl_user_tmp->copy(mpvio->thd->mem_root);
       break;
     }
   }
@@ -8230,29 +8198,32 @@ static bool find_mpvio_user(MPVIO_EXT *mpvio)
 
   if (!mpvio->acl_user)
   {
-    login_failed_error(mpvio, mpvio->auth_info.password_used);
+    login_failed_error(mpvio->thd);
     DBUG_RETURN (1);
   }
 
   /* user account requires non-default plugin and the client is too old */
   if (mpvio->acl_user->plugin.str != native_password_plugin_name.str &&
       mpvio->acl_user->plugin.str != old_password_plugin_name.str &&
-      !(mpvio->client_capabilities & CLIENT_PLUGIN_AUTH))
+      !(mpvio->thd->client_capabilities & CLIENT_PLUGIN_AUTH))
   {
     DBUG_ASSERT(my_strcasecmp(system_charset_info, mpvio->acl_user->plugin.str,
                               native_password_plugin_name.str));
     DBUG_ASSERT(my_strcasecmp(system_charset_info, mpvio->acl_user->plugin.str,
                               old_password_plugin_name.str));
     my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
-    general_log_print(current_thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
+    general_log_print(mpvio->thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
     DBUG_RETURN (1);
   }
 
+  mpvio->auth_info.user_name= sctx->user;
+  mpvio->auth_info.user_name_length= strlen(sctx->user);
   mpvio->auth_info.auth_string= mpvio->acl_user->auth_string.str;
   mpvio->auth_info.auth_string_length= 
     (unsigned long) mpvio->acl_user->auth_string.length;
   strmake(mpvio->auth_info.authenticated_as, mpvio->acl_user->user ?
           mpvio->acl_user->user : "", USERNAME_LENGTH);
+
   DBUG_PRINT("info", ("exit: user=%s, auth_string=%s, authenticated as=%s"
                       "plugin=%s",
                       mpvio->auth_info.user_name,
@@ -8266,7 +8237,9 @@ static bool find_mpvio_user(MPVIO_EXT *mpvio)
 /* the packet format is described in send_change_user_packet() */
 static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
 {
-  NET *net= mpvio->net;
+  THD *thd= mpvio->thd;
+  NET *net= &thd->net;
+  Security_context *sctx= thd->security_ctx;
 
   char *user= (char*) net->read_pos;
   char *end= user + packet_length;
@@ -8274,11 +8247,11 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
   char *passwd= strend(user) + 1;
   uint user_len= passwd - user - 1;
   char *db= passwd;
-  char db_buff[NAME_LEN + 1];                 // buffer to store db in utf8
+  char db_buff[SAFE_NAME_LEN + 1];            // buffer to store db in utf8
   char user_buff[USERNAME_LENGTH + 1];	      // buffer to store user in utf8
   uint dummy_errors;
-
   DBUG_ENTER ("parse_com_change_user_packet");
+
   if (passwd >= end)
   {
     my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
@@ -8295,7 +8268,7 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
     Cast *passwd to an unsigned char, so that it doesn't extend the sign for
     *passwd > 127 and become 2**32-127+ after casting to uint.
   */
-  uint passwd_len= (mpvio->client_capabilities & CLIENT_SECURE_CONNECTION ?
+  uint passwd_len= (thd->client_capabilities & CLIENT_SECURE_CONNECTION ?
                     (uchar) (*passwd++) : strlen(passwd));
 
   db+= passwd_len + 1;
@@ -8315,38 +8288,39 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
 
   if (ptr + 1 < end)
   {
-    if (mpvio->charset_adapter->init_client_charset(uint2korr(ptr)))
+    if (thd_init_client_charset(thd, uint2korr(ptr)))
       DBUG_RETURN(1);
+    thd->update_charset();
   }
 
-
   /* Convert database and user names to utf8 */
   db_len= copy_and_convert(db_buff, sizeof(db_buff) - 1, system_charset_info,
-                           db, db_len, mpvio->charset_adapter->charset(),
-                           &dummy_errors);
-  db_buff[db_len]= 0;
+                           db, db_len, thd->charset(), &dummy_errors);
 
   user_len= copy_and_convert(user_buff, sizeof(user_buff) - 1,
-                                  system_charset_info, user, user_len,
-                                  mpvio->charset_adapter->charset(),
-                                  &dummy_errors);
-  user_buff[user_len]= 0;
+                             system_charset_info, user, user_len,
+                             thd->charset(), &dummy_errors);
 
-  /* we should not free mpvio->user here: it's saved by dispatch_command() */
-  if (!(mpvio->auth_info.user_name= my_strndup(user_buff, user_len, MYF(MY_WME))))
-    return 1;
-  mpvio->auth_info.user_name_length= user_len;
+  if (!(sctx->user= my_strndup(user_buff, user_len, MYF(MY_WME))))
+    DBUG_RETURN(1);
 
-  if (make_lex_string_root(mpvio->mem_root, 
-                           &mpvio->db, db_buff, db_len, 0) == 0)
+  /* Clear variables that are allocated */
+  thd->user_connect= 0;
+  strmake(sctx->priv_user, sctx->user, USERNAME_LENGTH);
+
+  if (thd->make_lex_string(&mpvio->db, db_buff, db_len, 0) == 0)
     DBUG_RETURN(1); /* The error is set by make_lex_string(). */
 
+  /*
+    Clear thd->db as it points to something, that will be freed when
+    connection is closed. We don't want to accidentally free a wrong
+    pointer if connect failed.
+  */
+  thd->reset_db(NULL, 0);
+
   if (!initialized)
   {
     // if mysqld's been started with --skip-grant-tables option
-    strmake(mpvio->auth_info.authenticated_as, 
-            mpvio->auth_info.user_name, USERNAME_LENGTH);
-
     mpvio->status= MPVIO_EXT::SUCCESS;
     DBUG_RETURN(0);
   }
@@ -8356,7 +8330,7 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
     DBUG_RETURN(1);
 
   char *client_plugin;
-  if (mpvio->client_capabilities & CLIENT_PLUGIN_AUTH)
+  if (thd->client_capabilities & CLIENT_PLUGIN_AUTH)
   {
     client_plugin= ptr + 2;
     if (client_plugin >= end)
@@ -8364,10 +8338,11 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
       my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
       DBUG_RETURN(1);
     }
+    client_plugin= fix_plugin_ptr(client_plugin);
   }
   else
   {
-    if (mpvio->client_capabilities & CLIENT_SECURE_CONNECTION)
+    if (thd->client_capabilities & CLIENT_SECURE_CONNECTION)
       client_plugin= native_password_plugin_name.str;
     else
     {
@@ -8378,8 +8353,8 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
         old_password_plugin, otherwise MySQL will think that server 
         and client plugins don't match.
       */
-      if (mpvio->acl_user->salt_len == 0)
-        mpvio->acl_user_plugin= old_password_plugin_name;
+      if (mpvio->acl_user->auth_string.length == 0)
+        mpvio->acl_user->plugin= old_password_plugin_name;
     }
   }
 
@@ -8397,195 +8372,47 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
   DBUG_RETURN (0);
 }
 
-#ifndef EMBEDDED_LIBRARY
-
-/** Get a string according to the protocol of the underlying buffer. */
-typedef char * (*get_proto_string_func_t) (char **, size_t *, size_t *);
-
-/**
-  Get a string formatted according to the 4.1 version of the MySQL protocol.
-
-  @param buffer[in, out]    Pointer to the user-supplied buffer to be scanned.
-  @param max_bytes_available[in, out]  Limit the bytes to scan.
-  @param string_length[out] The number of characters scanned not including
-                            the null character.
-
-  @remark Strings are always null character terminated in this version of the
-          protocol.
-
-  @remark The string_length does not include the terminating null character.
-          However, after the call, the buffer is increased by string_length+1
-          bytes, beyond the null character if there still available bytes to
-          scan.
-
-  @return pointer to beginning of the string scanned.
-    @retval NULL The buffer content is malformed
-*/
-
-static
-char *get_41_protocol_string(char **buffer,
-                             size_t *max_bytes_available,
-                             size_t *string_length)
-{
-  char *str= (char *)memchr(*buffer, '\0', *max_bytes_available);
-
-  if (str == NULL)
-    return NULL;
-
-  *string_length= (size_t)(str - *buffer);
-  *max_bytes_available-= *string_length + 1;
-  str= *buffer;
-  *buffer += *string_length + 1;
-
-  return str;
-}
-
-
-/**
-  Get a string formatted according to the 4.0 version of the MySQL protocol.
-
-  @param buffer[in, out]    Pointer to the user-supplied buffer to be scanned.
-  @param max_bytes_available[in, out]  Limit the bytes to scan.
-  @param string_length[out] The number of characters scanned not including
-                            the null character.
-
-  @remark If there are not enough bytes left after the current position of
-          the buffer to satisfy the current string, the string is considered
-          to be empty and a pointer to empty_c_string is returned.
-
-  @remark A string at the end of the packet is not null terminated.
-
-  @return Pointer to beginning of the string scanned, or a pointer to a empty
-          string.
-*/
-static
-char *get_40_protocol_string(char **buffer,
-                             size_t *max_bytes_available,
-                             size_t *string_length)
-{
-  char *str;
-  size_t len;
-
-  /* No bytes to scan left, treat string as empty. */
-  if ((*max_bytes_available) == 0)
-  {
-    *string_length= 0;
-    return empty_c_string;
-  }
-
-  str= (char *) memchr(*buffer, '\0', *max_bytes_available);
-
-  /*
-    If the string was not null terminated by the client,
-    the remainder of the packet is the string. Otherwise,
-    advance the buffer past the end of the null terminated
-    string.
-  */
-  if (str == NULL)
-    len= *string_length= *max_bytes_available;
-  else
-    len= (*string_length= (size_t)(str - *buffer)) + 1;
-
-  str= *buffer;
-  *buffer+= len;
-  *max_bytes_available-= len;
-
-  return str;
-}
-
-/**
-  Get a length encoded string from a user-supplied buffer.
-
-  @param buffer[in, out] The buffer to scan; updates position after scan.
-  @param max_bytes_available[in, out] Limit the number of bytes to scan
-  @param string_length[out] Number of characters scanned
-
-  @remark In case the length is zero, then the total size of the string is
-    considered to be 1 byte; the size byte.
-
-  @return pointer to first byte after the header in buffer.
-    @retval NULL The buffer content is malformed
-*/
-
-static
-char *get_length_encoded_string(char **buffer,
-                                size_t *max_bytes_available,
-                                size_t *string_length)
-{
-  if (*max_bytes_available == 0)
-    return NULL;
-
-  /* Do double cast to prevent overflow from signed / unsigned conversion */
-  size_t str_len= (size_t)(unsigned char)**buffer;
-
-  /*
-    If the length encoded string has the length 0
-    the total size of the string is only one byte long (the size byte)
-  */
-  if (str_len == 0)
-  {
-    ++*buffer;
-    *string_length= 0;
-    /*
-      Return a pointer to the 0 character so the return value will be
-      an empty string.
-    */
-    return *buffer-1;
-  }
-
-  if (str_len >= *max_bytes_available)
-    return NULL;
-
-  char *str= *buffer+1;
-  *string_length= str_len;
-  *max_bytes_available-= *string_length + 1;
-  *buffer+= *string_length + 1;
-  return str;
-}
-#endif
-
 
 /* the packet format is described in send_client_reply_packet() */
 static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
                                            uchar **buff, ulong pkt_len)
 {
 #ifndef EMBEDDED_LIBRARY
-  NET *net= mpvio->net;
+  THD *thd= mpvio->thd;
+  NET *net= &thd->net;
   char *end;
-
   DBUG_ASSERT(mpvio->status == MPVIO_EXT::FAILURE);
 
   if (pkt_len < MIN_HANDSHAKE_SIZE)
     return packet_error;
 
   if (mpvio->connect_errors)
-    reset_host_errors(mpvio->ip);
+    reset_host_errors(thd->main_security_ctx.ip);
 
   ulong client_capabilities= uint2korr(net->read_pos);
   if (client_capabilities & CLIENT_PROTOCOL_41)
   {
-    client_capabilities|= ((ulong) uint2korr(net->read_pos + 2)) << 16;
-    mpvio->max_client_packet_length= uint4korr(net->read_pos + 4);
+    client_capabilities|= ((ulonglong) uint2korr(net->read_pos + 2)) << 16;
+    thd->max_client_packet_length= uint4korr(net->read_pos + 4);
     DBUG_PRINT("info", ("client_character_set: %d", (uint) net->read_pos[8]));
-    if (mpvio->charset_adapter->init_client_charset((uint) net->read_pos[8]))
+    if (thd_init_client_charset(thd, (uint) net->read_pos[8]))
       return packet_error;
+    thd->update_charset();
     end= (char*) net->read_pos + 32;
   }
   else
   {
-    mpvio->max_client_packet_length= uint3korr(net->read_pos + 2);
+    thd->max_client_packet_length= uint3korr(net->read_pos + 2);
     end= (char*) net->read_pos + 5;
   }
 
   /* Disable those bits which are not supported by the client. */
-  mpvio->client_capabilities&= client_capabilities;
+  thd->client_capabilities&= client_capabilities;
 
-
-#if defined(HAVE_OPENSSL)
-  DBUG_PRINT("info", ("client capabilities: %lu", mpvio->client_capabilities));
-  if (mpvio->client_capabilities & CLIENT_SSL)
+  DBUG_PRINT("info", ("client capabilities: %lu", thd->client_capabilities));
+  if (thd->client_capabilities & CLIENT_SSL)
   {
-    unsigned long errptr;
+    unsigned long errptr __attribute__((unused));
 
     /* Do the SSL layering. */
     if (!ssl_acceptor_fd)
@@ -8607,134 +8434,99 @@ static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
       return packet_error;
     }
   }
-#endif
 
-  if (end > (char *)net->read_pos + pkt_len)
+  if (thd->client_capabilities & CLIENT_IGNORE_SPACE)
+    thd->variables.sql_mode|= MODE_IGNORE_SPACE;
+  if (thd->client_capabilities & CLIENT_INTERACTIVE)
+    thd->variables.net_wait_timeout= thd->variables.net_interactive_timeout;
+
+  if (end >= (char*) net->read_pos+ pkt_len +2)
     return packet_error;
 
-  if ((mpvio->client_capabilities & CLIENT_TRANSACTIONS) &&
+  if ((thd->client_capabilities & CLIENT_TRANSACTIONS) &&
       opt_using_transactions)
-    net->return_status= mpvio->server_status;
-
-  /*
-    The 4.0 and 4.1 versions of the protocol differ on how strings
-    are terminated. In the 4.0 version, if a string is at the end
-    of the packet, the string is not null terminated. Do not assume
-    that the returned string is always null terminated.
-  */
-  get_proto_string_func_t get_string;
-
-  if (mpvio->client_capabilities & CLIENT_PROTOCOL_41)
-    get_string= get_41_protocol_string;
-  else
-    get_string= get_40_protocol_string;
+    net->return_status= &thd->server_status;
 
-  /*
-    In order to safely scan a head for '\0' string terminators
-    we must keep track of how many bytes remain in the allocated
-    buffer or we might read past the end of the buffer.
-  */
-  size_t bytes_remaining_in_packet= pkt_len - (end - (char *)net->read_pos);
-
-  size_t user_len;
-  char *user= get_string(&end, &bytes_remaining_in_packet, &user_len);
-  if (user == NULL)
-    return packet_error;
+  char *user= end;
+  char *passwd= strend(user)+1;
+  uint user_len= passwd - user - 1, db_len;
+  char *db= passwd;
+  char db_buff[SAFE_NAME_LEN + 1];      // buffer to store db in utf8
+  char user_buff[USERNAME_LENGTH + 1];	// buffer to store user in utf8
+  uint dummy_errors;
 
   /*
-    Old clients send a null-terminated string as password; new clients send
+    Old clients send null-terminated string as password; new clients send
     the size (1 byte) + string (not null-terminated). Hence in case of empty
     password both send '\0'.
-  */
-  size_t passwd_len= 0;
-  char *passwd= NULL;
 
-  if (mpvio->client_capabilities & CLIENT_SECURE_CONNECTION)
-  {
-    /*
-      4.1+ password. First byte is password length.
-    */
-    passwd= get_length_encoded_string(&end, &bytes_remaining_in_packet,
-                                      &passwd_len);
-  }
-  else
-  {
-    /*
-      Old passwords are zero terminated strings.
-    */
-    passwd= get_string(&end, &bytes_remaining_in_packet, &passwd_len);
-  }
-
-  if (passwd == NULL)
-    return packet_error;
-
-  size_t db_len= 0;
-  char *db= NULL;
-
-  if (mpvio->client_capabilities & CLIENT_CONNECT_WITH_DB)
-  {
-    db= get_string(&end, &bytes_remaining_in_packet, &db_len);
-    if (db == NULL)
-      return packet_error;
-  }
+    This strlen() can't be easily deleted without changing protocol.
 
-  /*
-    Set the default for the password supplied flag for non-existing users
-    as the default plugin (native passsword authentication) would do it
-    for compatibility reasons.
+    Cast *passwd to an unsigned char, so that it doesn't extend the sign for
+    *passwd > 127 and become 2**32-127+ after casting to uint.
   */
-  if (passwd_len)
-    mpvio->auth_info.password_used= PASSWORD_USED_YES;
+  uint passwd_len= thd->client_capabilities & CLIENT_SECURE_CONNECTION ?
+                   (uchar)(*passwd++) : strlen(passwd);
 
-  size_t client_plugin_len= 0;
-  char *client_plugin= get_string(&end, &bytes_remaining_in_packet,
-                                  &client_plugin_len);
-  if (client_plugin == NULL)
-    client_plugin= &empty_c_string[0];
+  db= thd->client_capabilities & CLIENT_CONNECT_WITH_DB ?
+    db + passwd_len + 1 : 0;
 
-  char db_buff[NAME_LEN + 1];           // buffer to store db in utf8
-  char user_buff[USERNAME_LENGTH + 1];	// buffer to store user in utf8
-  uint dummy_errors;
+  if (passwd + passwd_len + test(db) > (char *)net->read_pos + pkt_len)
+    return packet_error;
 
+  /* strlen() can't be easily deleted without changing protocol */
+  db_len= db ? strlen(db) : 0;
 
-  /*
-    Copy and convert the user and database names to the character set used
-    by the server. Since 4.1 all database names are stored in UTF-8. Also,
-    ensure that the names are properly null-terminated as this is relied
-    upon later.
-  */
+  char *client_plugin= passwd + passwd_len + (db ? db_len + 1 : 0);
+
+  /* Since 4.1 all database names are stored in utf8 */
   if (db)
   {
     db_len= copy_and_convert(db_buff, sizeof(db_buff) - 1, system_charset_info,
-                             db, db_len, mpvio->charset_adapter->charset(),
-                             &dummy_errors);
-    db_buff[db_len]= '\0';
+                             db, db_len, thd->charset(), &dummy_errors);
     db= db_buff;
   }
 
   user_len= copy_and_convert(user_buff, sizeof(user_buff) - 1,
                              system_charset_info, user, user_len,
-                             mpvio->charset_adapter->charset(),
-                             &dummy_errors);
-  user_buff[user_len]= '\0';
+                             thd->charset(), &dummy_errors);
   user= user_buff;
 
   /* If username starts and ends in "'", chop them off */
   if (user_len > 1 && user[0] == '\'' && user[user_len - 1] == '\'')
   {
-    user[user_len - 1]= 0;
     user++;
     user_len-= 2;
   }
 
-  if (make_lex_string_root(mpvio->mem_root, 
-                           &mpvio->db, db, db_len, 0) == 0)
+  /*
+    Clip username to allowed length in characters (not bytes).  This is
+    mostly for backward compatibility.
+  */
+  {
+    CHARSET_INFO *cs= system_charset_info;
+    int           err;
+
+    user_len= (uint) cs->cset->well_formed_len(cs, user, user + user_len,
+                                               USERNAME_CHAR_LENGTH, &err);
+    user[user_len]= '\0';
+  }
+
+  Security_context *sctx= thd->security_ctx;
+
+  if (thd->make_lex_string(&mpvio->db, db, db_len, 0) == 0)
     return packet_error; /* The error is set by make_lex_string(). */
-  if (mpvio->auth_info.user_name)
-    my_free(mpvio->auth_info.user_name);
-  if (!(mpvio->auth_info.user_name= my_strndup(user, user_len, MYF(MY_WME))))
+  my_free(sctx->user);
+  if (!(sctx->user= my_strndup(user, user_len, MYF(MY_WME))))
     return packet_error; /* The error is set by my_strdup(). */
-  mpvio->auth_info.user_name_length= user_len;
+
+
+  /*
+    Clear thd->db as it points to something, that will be freed when
+    connection is closed. We don't want to accidentally free a wrong
+    pointer if connect failed.
+  */
+  thd->reset_db(NULL, 0);
 
   if (!initialized)
   {
@@ -8743,30 +8535,31 @@ static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
     return packet_error;
   }
 
+  thd->password= passwd_len > 0;
   if (find_mpvio_user(mpvio))
     return packet_error;
 
-  if (!(mpvio->client_capabilities & CLIENT_PLUGIN_AUTH))
+  if (thd->client_capabilities & CLIENT_PLUGIN_AUTH)
   {
-    /*
-      An old client is connecting
-    */
-    if (mpvio->client_capabilities & CLIENT_SECURE_CONNECTION)
+    if (client_plugin >= (char *)net->read_pos + pkt_len)
+      return packet_error;
+    client_plugin= fix_plugin_ptr(client_plugin);
+  }
+  else
+  {
+    if (thd->client_capabilities & CLIENT_SECURE_CONNECTION)
       client_plugin= native_password_plugin_name.str;
     else
     {
-      /*
-        A really old client is connecting
-      */
-      client_plugin= old_password_plugin_name.str;
+      client_plugin=  old_password_plugin_name.str;
       /*
         For a passwordless accounts we use native_password_plugin.
         But when an old 4.0 client connects to it, we change it to
         old_password_plugin, otherwise MySQL will think that server 
         and client plugins don't match.
       */
-      if (mpvio->acl_user->salt_len == 0)
-        mpvio->acl_user_plugin= old_password_plugin_name;
+      if (mpvio->acl_user->auth_string.length == 0)
+        mpvio->acl_user->plugin= old_password_plugin_name;
     }
   }
   
@@ -8779,7 +8572,7 @@ static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
     restarted and a server auth plugin will read the data that the client
     has just send. Cache them to return in the next server_mpvio_read_packet().
   */
-  if (my_strcasecmp(system_charset_info, mpvio->acl_user_plugin.str,
+  if (my_strcasecmp(system_charset_info, mpvio->acl_user->plugin.str,
                     plugin_name(mpvio->plugin)->str) != 0)
   {
     mpvio->cached_client_reply.pkt= passwd;
@@ -8807,8 +8600,8 @@ static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
                                    mpvio->cached_server_packet.pkt_len))
       return packet_error;
 
-    passwd_len= my_net_read(mpvio->net);
-    passwd = (char*) mpvio->net->read_pos;
+    passwd_len= my_net_read(&mpvio->thd->net);
+    passwd= (char*)mpvio->thd->net.read_pos;
   }
 
   *buff= (uchar*) passwd;
@@ -8820,23 +8613,6 @@ static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
 
 
 /**
-  Make sure that when sending plugin supplied data to the client they
-  are not considered a special out-of-band command, like e.g. 
-  \255 (error) or \254 (change user request packet) or \0 (OK).
-  To avoid this the server will send all plugin data packets "wrapped" 
-  in a command \1.
-  Note that the client will continue sending its replies unrwapped.
-*/
-
-static inline int 
-wrap_plguin_data_into_proper_command(NET *net, 
-                                     const uchar *packet, int packet_len)
-{
-  return net_write_command(net, 1, (uchar *) "", 0, packet, packet_len);
-}
-
-
-/**
   vio->write_packet() callback method for server authentication plugins
 
   This function is called by a server authentication plugin, when it wants
@@ -8851,25 +8627,32 @@ static int server_mpvio_write_packet(MYSQL_PLUGIN_VIO *param,
 {
   MPVIO_EXT *mpvio= (MPVIO_EXT *) param;
   int res;
-
   DBUG_ENTER("server_mpvio_write_packet");
-  /* 
-    Reset cached_client_reply if not an old client doing mysql_change_user, 
-    as this is where the password from COM_CHANGE_USER is stored.
-  */
-  if (!((!(mpvio->client_capabilities & CLIENT_PLUGIN_AUTH)) && 
-        mpvio->status == MPVIO_EXT::RESTART &&
-        mpvio->cached_client_reply.plugin == 
-        ((st_mysql_auth *) (plugin_decl(mpvio->plugin)->info))->client_auth_plugin
-        ))
-    mpvio->cached_client_reply.pkt= 0;
+
+  /* reset cached_client_reply */
+  mpvio->cached_client_reply.pkt= 0;
+
   /* for the 1st packet we wrap plugin data into the handshake packet */
   if (mpvio->packets_written == 0)
     res= send_server_handshake_packet(mpvio, (char*) packet, packet_len);
   else if (mpvio->status == MPVIO_EXT::RESTART)
     res= send_plugin_request_packet(mpvio, packet, packet_len);
+  else if (packet_len > 0 && (*packet == 1 || *packet == 255 || *packet == 254))
+  {
+    /*
+      we cannot allow plugin data packet to start from 255 or 254 -
+      as the client will treat it as an error or "change plugin" packet.
+      We'll escape these bytes with \1. Consequently, we
+      have to escape \1 byte too.
+    */
+    res= net_write_command(&mpvio->thd->net, 1, (uchar*)"", 0,
+                           packet, packet_len);
+  }
   else
-    res= wrap_plguin_data_into_proper_command(mpvio->net, packet, packet_len);
+  {
+    res= my_net_write(&mpvio->thd->net, packet, packet_len) ||
+         net_flush(&mpvio->thd->net);
+  }
   mpvio->packets_written++;
   DBUG_RETURN(res);
 }
@@ -8888,7 +8671,6 @@ static int server_mpvio_read_packet(MYSQL_PLUGIN_VIO *param, uchar **buf)
 {
   MPVIO_EXT *mpvio= (MPVIO_EXT *) param;
   ulong pkt_len;
-
   DBUG_ENTER("server_mpvio_read_packet");
   if (mpvio->packets_written == 0)
   {
@@ -8896,10 +8678,10 @@ static int server_mpvio_read_packet(MYSQL_PLUGIN_VIO *param, uchar **buf)
       plugin wants to read the data without sending anything first.
       send an empty packet to force a server handshake packet to be sent
     */
-    if (mpvio->write_packet(mpvio, 0, 0))
+    if (server_mpvio_write_packet(mpvio, 0, 0))
       pkt_len= packet_error;
     else
-      pkt_len= my_net_read(mpvio->net);
+      pkt_len= my_net_read(&mpvio->thd->net);
   }
   else if (mpvio->cached_client_reply.pkt)
   {
@@ -8924,26 +8706,18 @@ static int server_mpvio_read_packet(MYSQL_PLUGIN_VIO *param, uchar **buf)
       DBUG_RETURN ((int) mpvio->cached_client_reply.pkt_len);
     }
 
-    /* older clients don't support change of client plugin request */
-    if (!(mpvio->client_capabilities & CLIENT_PLUGIN_AUTH))
-    {
-      mpvio->status= MPVIO_EXT::FAILURE;
-      pkt_len= packet_error;
-      goto err;
-    }
-
     /*
       But if the client has used the wrong plugin, the cached data are
       useless. Furthermore, we have to send a "change plugin" request
       to the client.
     */
-    if (mpvio->write_packet(mpvio, 0, 0))
+    if (server_mpvio_write_packet(mpvio, 0, 0))
       pkt_len= packet_error;
     else
-      pkt_len= my_net_read(mpvio->net);
+      pkt_len= my_net_read(&mpvio->thd->net);
   }
   else
-    pkt_len= my_net_read(mpvio->net);
+    pkt_len= my_net_read(&mpvio->thd->net);
 
   if (pkt_len == packet_error)
     goto err;
@@ -8961,15 +8735,16 @@ static int server_mpvio_read_packet(MYSQL_PLUGIN_VIO *param, uchar **buf)
       goto err;
   }
   else
-    *buf= mpvio->net->read_pos;
+    *buf= mpvio->thd->net.read_pos;
 
   DBUG_RETURN((int)pkt_len);
 
 err:
   if (mpvio->status == MPVIO_EXT::FAILURE)
   {
-    inc_host_errors(mpvio->ip);
-    my_error(ER_HANDSHAKE_ERROR, MYF(0));
+    inc_host_errors(mpvio->thd->security_ctx->ip);
+    if (!mpvio->thd->is_error())
+      my_error(ER_HANDSHAKE_ERROR, MYF(0));
   }
   DBUG_RETURN(-1);
 }
@@ -8982,13 +8757,12 @@ static void server_mpvio_info(MYSQL_PLUGIN_VIO *vio,
                               MYSQL_PLUGIN_VIO_INFO *info)
 {
   MPVIO_EXT *mpvio= (MPVIO_EXT *) vio;
-  mpvio_info(mpvio->net->vio, info);
+  mpvio_info(mpvio->thd->net.vio, info);
 }
 
-#ifndef NO_EMBEDDED_ACCESS_CHECKS
 static bool acl_check_ssl(THD *thd, const ACL_USER *acl_user)
 {
-#if defined(HAVE_OPENSSL)
+#ifdef HAVE_OPENSSL
   Vio *vio= thd->net.vio;
   SSL *ssl= (SSL *) vio->ssl_arg;
   X509 *cert;
@@ -9004,7 +8778,7 @@ static bool acl_check_ssl(THD *thd, const ACL_USER *acl_user)
   case SSL_TYPE_NOT_SPECIFIED:                  // Impossible
   case SSL_TYPE_NONE:                           // SSL is not required
     return 0;
-#if defined(HAVE_OPENSSL)
+#ifdef HAVE_OPENSSL
   case SSL_TYPE_ANY:                            // Any kind of SSL is ok
     return vio_type(vio) != VIO_TYPE_SSL;
   case SSL_TYPE_X509: /* Client should have any valid certificate. */
@@ -9090,7 +8864,6 @@ static bool acl_check_ssl(THD *thd, const ACL_USER *acl_user)
   }
   return 1;
 }
-#endif
 
 
 static int do_auth_once(THD *thd, const LEX_STRING *auth_plugin_name,
@@ -9098,24 +8871,21 @@ static int do_auth_once(THD *thd, const LEX_STRING *auth_plugin_name,
 {
   int res= CR_OK, old_status= MPVIO_EXT::FAILURE;
   bool unlock_plugin= false;
-  plugin_ref plugin;
+  plugin_ref plugin= NULL;
 
   if (auth_plugin_name->str == native_password_plugin_name.str)
     plugin= native_password_plugin;
-  else
 #ifndef EMBEDDED_LIBRARY
-  if (auth_plugin_name->str == old_password_plugin_name.str)
+  else if (auth_plugin_name->str == old_password_plugin_name.str)
     plugin= old_password_plugin;
   else if ((plugin= my_plugin_lock_by_name(thd, auth_plugin_name,
                                            MYSQL_AUTHENTICATION_PLUGIN)))
     unlock_plugin= true;
-  else
 #endif
-    plugin= NULL;
-    
+
   mpvio->plugin= plugin;
   old_status= mpvio->status;
-  
+
   if (plugin)
   {
     st_mysql_auth *auth= (st_mysql_auth *) plugin_decl(plugin)->info;
@@ -9146,47 +8916,6 @@ static int do_auth_once(THD *thd, const LEX_STRING *auth_plugin_name,
 }
 
 
-static void
-server_mpvio_initialize(THD *thd, MPVIO_EXT *mpvio, uint connect_errors,
-                        Thd_charset_adapter *charset_adapter)
-{
-  memset(mpvio, 0, sizeof(MPVIO_EXT));
-  mpvio->read_packet= server_mpvio_read_packet;
-  mpvio->write_packet= server_mpvio_write_packet;
-  mpvio->info= server_mpvio_info;
-  mpvio->auth_info.host_or_ip= thd->security_ctx->host_or_ip;
-  mpvio->auth_info.host_or_ip_length= 
-    (unsigned int) strlen(thd->security_ctx->host_or_ip);
-  mpvio->auth_info.user_name= NULL;
-  mpvio->auth_info.user_name_length= 0;
-  mpvio->connect_errors= connect_errors;
-  mpvio->status= MPVIO_EXT::FAILURE;
-
-  mpvio->client_capabilities= thd->client_capabilities;
-  mpvio->mem_root= thd->mem_root;
-  mpvio->scramble= thd->scramble;
-  mpvio->rand= &thd->rand;
-  mpvio->thread_id= thd->thread_id;
-  mpvio->server_status= &thd->server_status;
-  mpvio->net= &thd->net;
-  mpvio->ip= thd->security_ctx->ip;
-  mpvio->host= thd->security_ctx->host;
-  mpvio->charset_adapter= charset_adapter;
-}
-
-
-static void
-server_mpvio_update_thd(THD *thd, MPVIO_EXT *mpvio)
-{
-  thd->client_capabilities= mpvio->client_capabilities;
-  thd->max_client_packet_length= mpvio->max_client_packet_length;
-  if (mpvio->client_capabilities & CLIENT_INTERACTIVE)
-    thd->variables.net_wait_timeout= thd->variables.net_interactive_timeout;
-  thd->security_ctx->user= mpvio->auth_info.user_name;
-  if (thd->client_capabilities & CLIENT_IGNORE_SPACE)
-    thd->variables.sql_mode|= MODE_IGNORE_SPACE;
-}
-
 /**
   Perform the handshake, authorize the client and update thd sctx variables.
 
@@ -9201,44 +8930,38 @@ server_mpvio_update_thd(THD *thd, MPVIO_EXT *mpvio)
   @retval 0  success, thd is updated.
   @retval 1  error
 */
-bool 
-acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
+bool acl_authenticate(THD *thd, uint connect_errors,
+                      uint com_change_user_pkt_len)
 {
   int res= CR_OK;
   MPVIO_EXT mpvio;
-  Thd_charset_adapter charset_adapter(thd);
-
   const LEX_STRING *auth_plugin_name= default_auth_plugin_name;
   enum  enum_server_command command= com_change_user_pkt_len ? COM_CHANGE_USER
                                                              : COM_CONNECT;
-
   DBUG_ENTER("acl_authenticate");
+
   compile_time_assert(MYSQL_USERNAME_LENGTH == USERNAME_LENGTH);
 
-  server_mpvio_initialize(thd, &mpvio, connect_errors, &charset_adapter);
+  bzero(&mpvio, sizeof(mpvio));
+  mpvio.read_packet= server_mpvio_read_packet;
+  mpvio.write_packet= server_mpvio_write_packet;
+  mpvio.info= server_mpvio_info;
+  mpvio.thd= thd;
+  mpvio.connect_errors= connect_errors;
+  mpvio.status= MPVIO_EXT::FAILURE;
+  mpvio.auth_info.host_or_ip= thd->security_ctx->host_or_ip;
+  mpvio.auth_info.host_or_ip_length= 
+    (unsigned int) strlen(thd->security_ctx->host_or_ip);
 
   DBUG_PRINT("info", ("com_change_user_pkt_len=%u", com_change_user_pkt_len));
 
-  /*
-    Clear thd->db as it points to something, that will be freed when
-    connection is closed. We don't want to accidentally free a wrong
-    pointer if connect failed.
-  */
-  thd->reset_db(NULL, 0);
-
   if (command == COM_CHANGE_USER)
   {
     mpvio.packets_written++; // pretend that a server handshake packet was sent
     mpvio.packets_read++;    // take COM_CHANGE_USER packet into account
 
-    /* Clear variables that are allocated */
-    thd->user_connect= 0;
-
     if (parse_com_change_user_packet(&mpvio, com_change_user_pkt_len))
-    {
-      server_mpvio_update_thd(thd, &mpvio);
       DBUG_RETURN(1);
-    }
 
     DBUG_ASSERT(mpvio.status == MPVIO_EXT::RESTART ||
                 mpvio.status == MPVIO_EXT::SUCCESS);
@@ -9246,21 +8969,21 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
   else
   {
     /* mark the thd as having no scramble yet */
-    mpvio.scramble[SCRAMBLE_LENGTH]= 1;
-    
+    thd->scramble[SCRAMBLE_LENGTH]= 1;
+
     /*
-     perform the first authentication attempt, with the default plugin.
-     This sends the server handshake packet, reads the client reply
-     with a user name, and performs the authentication if everyone has used
-     the correct plugin.
+      perform the first authentication attempt, with the default plugin.
+      This sends the server handshake packet, reads the client reply
+      with a user name, and performs the authentication if everyone has used
+      the correct plugin.
     */
 
     res= do_auth_once(thd, auth_plugin_name, &mpvio);  
   }
 
   /*
-   retry the authentication, if - after receiving the user name -
-   we found that we need to switch to a non-default plugin
+    retry the authentication, if - after receiving the user name -
+    we found that we need to switch to a non-default plugin
   */
   if (mpvio.status == MPVIO_EXT::RESTART)
   {
@@ -9272,8 +8995,6 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
     res= do_auth_once(thd, auth_plugin_name, &mpvio);
   }
 
-  server_mpvio_update_thd(thd, &mpvio);
-
   Security_context *sctx= thd->security_ctx;
   const ACL_USER *acl_user= mpvio.acl_user;
 
@@ -9285,19 +9006,18 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
 
     if sctx->user is unset it's protocol failure, bad packet.
   */
-  if (mpvio.auth_info.user_name)
+  if (sctx->user)
   {
-    if (strcmp(mpvio.auth_info.authenticated_as, mpvio.auth_info.user_name))
+    if (strcmp(sctx->priv_user, sctx->user))
     {
       general_log_print(thd, command, "%s@%s as %s on %s",
-                        mpvio.auth_info.user_name, mpvio.auth_info.host_or_ip,
-                        mpvio.auth_info.authenticated_as ? 
-                          mpvio.auth_info.authenticated_as : "anonymous",
+                        sctx->user, sctx->host_or_ip,
+                        sctx->priv_user[0] ? sctx->priv_user : "anonymous",
                         mpvio.db.str ? mpvio.db.str : (char*) "");
     }
     else
       general_log_print(thd, command, (char*) "%s@%s on %s",
-                        mpvio.auth_info.user_name, mpvio.auth_info.host_or_ip,
+                        sctx->user, sctx->host_or_ip,
                         mpvio.db.str ? mpvio.db.str : (char*) "");
   }
 
@@ -9306,8 +9026,8 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
     DBUG_ASSERT(mpvio.status == MPVIO_EXT::FAILURE);
 
     if (!thd->is_error())
-      login_failed_error(&mpvio, mpvio.auth_info.password_used);
-    DBUG_RETURN (1);
+      login_failed_error(thd);
+    DBUG_RETURN(1);
   }
 
   sctx->proxy_user[0]= 0;
@@ -9330,7 +9050,7 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
       if (!proxy_user)
       {
         if (!thd->is_error())
-          login_failed_error(&mpvio, mpvio.auth_info.password_used);
+          login_failed_error(thd);
         DBUG_RETURN(1);
       }
 
@@ -9346,7 +9066,7 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
       if (!acl_proxy_user)
       {
         if (!thd->is_error())
-          login_failed_error(&mpvio, mpvio.auth_info.password_used);
+          login_failed_error(thd);
         mysql_mutex_unlock(&acl_cache->lock);
         DBUG_RETURN(1);
       }
@@ -9366,7 +9086,6 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
     else
       *sctx->priv_host= 0;
 
-#ifndef NO_EMBEDDED_ACCESS_CHECKS
     /*
       OK. Let's check the SSL. Historically it was checked after the password,
       as an additional layer, not instead of the password
@@ -9374,23 +9093,20 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
     */
     if (acl_check_ssl(thd, acl_user))
     {
-      if (!thd->is_error())
-        login_failed_error(&mpvio, thd->password);
+      login_failed_error(thd);
       DBUG_RETURN(1);
     }
 
     /* Don't allow the user to connect if he has done too many queries */
     if ((acl_user->user_resource.questions || acl_user->user_resource.updates ||
          acl_user->user_resource.conn_per_hour ||
-         acl_user->user_resource.user_conn || 
+         acl_user->user_resource.user_conn ||
          global_system_variables.max_user_connections) &&
         get_or_create_user_conn(thd,
           (opt_old_style_user_limits ? sctx->user : sctx->priv_user),
           (opt_old_style_user_limits ? sctx->host_or_ip : sctx->priv_host),
           &acl_user->user_resource))
       DBUG_RETURN(1); // The error is set by get_or_create_user_conn()
-
-#endif
   }
   else
     sctx->skip_grants();
@@ -9401,6 +9117,9 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
        global_system_variables.max_user_connections) &&
       check_for_max_user_connections(thd, thd->user_connect))
   {
+    /* Ensure we don't decrement thd->user_connections->connections twice */
+    thd->user_connect= 0;
+    status_var_increment(denied_connections);
     DBUG_RETURN(1); // The error is set in check_for_max_user_connections()
   }
 
@@ -9417,7 +9136,8 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
       !(thd->main_security_ctx.master_access & SUPER_ACL))
   {
     mysql_mutex_lock(&LOCK_connection_count);
-    bool count_ok= (connection_count <= max_connections);
+    bool count_ok= (*thd->scheduler->connection_count <=
+                    *thd->scheduler->max_connections);
     mysql_mutex_unlock(&LOCK_connection_count);
     if (!count_ok)
     {                                         // too many connections
@@ -9439,15 +9159,13 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
     if (mysql_change_db(thd, &mpvio.db, FALSE))
     {
       /* mysql_change_db() has pushed the error message. */
-      if (thd->user_connect)
-      {
-        decrease_user_connections(thd->user_connect);
-        thd->user_connect= 0;
-      }
+      status_var_increment(thd->status_var.access_denied_errors);
       DBUG_RETURN(1);
     }
   }
 
+  thd->net.net_skip_rest_factor= 2;  // skip at most 2*max_packet_size
+
   if (mpvio.auth_info.external_user[0])
     sctx->external_user= my_strdup(mpvio.auth_info.external_user, MYF(0));
 
@@ -9456,16 +9174,6 @@ acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
   else
     my_ok(thd);
 
-#if defined(MYSQL_SERVER) && !defined(EMBEDDED_LIBRARY)
-  /*
-    Allow the network layer to skip big packets. Although a malicious
-    authenticated session might use this to trick the server to read
-    big packets indefinitely, this is a previously established behavior
-    that needs to be preserved as to not break backwards compatibility.
-  */
-  thd->net.skip_big_packet= TRUE;
-#endif
-
   /* Ready to handle queries */
   DBUG_RETURN(0);
 }
@@ -9484,16 +9192,17 @@ static int native_password_authenticate(MYSQL_PLUGIN_VIO *vio,
   uchar *pkt;
   int pkt_len;
   MPVIO_EXT *mpvio= (MPVIO_EXT *) vio;
-
+  THD *thd=mpvio->thd;
   DBUG_ENTER("native_password_authenticate");
 
   /* generate the scramble, or reuse the old one */
-  if (mpvio->scramble[SCRAMBLE_LENGTH])
-    create_random_string(mpvio->scramble, SCRAMBLE_LENGTH, mpvio->rand);
-
-  /* send it to the client */
-  if (mpvio->write_packet(mpvio, (uchar*) mpvio->scramble, SCRAMBLE_LENGTH + 1))
-    DBUG_RETURN(CR_ERROR);
+  if (thd->scramble[SCRAMBLE_LENGTH])
+  {
+    create_random_string(thd->scramble, SCRAMBLE_LENGTH, &thd->rand);
+    /* and send it to the client */
+    if (mpvio->write_packet(mpvio, (uchar*)thd->scramble, SCRAMBLE_LENGTH + 1))
+      DBUG_RETURN(CR_ERROR);
+  }
 
   /* reply and authenticate */
 
@@ -9527,7 +9236,7 @@ static int native_password_authenticate(MYSQL_PLUGIN_VIO *vio,
       plugins are involved.
 
       Anyway, it still looks simple from a plugin point of view:
-      "send the scramble, read the reply and authenticate"
+      "send the scramble, read the reply and authenticate".
       All the magic is transparently handled by the server.
     </digression>
   */
@@ -9542,7 +9251,7 @@ static int native_password_authenticate(MYSQL_PLUGIN_VIO *vio,
 #endif
 
   if (pkt_len == 0) /* no password */
-    DBUG_RETURN(mpvio->acl_user->salt_len != 0 ? CR_ERROR : CR_OK);
+    DBUG_RETURN(info->auth_string[0] ? CR_ERROR : CR_OK);
 
   info->password_used= PASSWORD_USED_YES;
   if (pkt_len == SCRAMBLE_LENGTH)
@@ -9550,11 +9259,13 @@ static int native_password_authenticate(MYSQL_PLUGIN_VIO *vio,
     if (!mpvio->acl_user->salt_len)
       DBUG_RETURN(CR_ERROR);
 
-    DBUG_RETURN(check_scramble(pkt, mpvio->scramble, mpvio->acl_user->salt) ?
-                CR_ERROR : CR_OK);
+    if (check_scramble(pkt, thd->scramble, mpvio->acl_user->salt))
+      DBUG_RETURN(CR_ERROR);
+    else
+      DBUG_RETURN(CR_OK);
   }
 
-  inc_host_errors(mpvio->ip);
+  inc_host_errors(mpvio->thd->security_ctx->ip);
   my_error(ER_HANDSHAKE_ERROR, MYF(0));
   DBUG_RETURN(CR_ERROR);
 }
@@ -9565,14 +9276,16 @@ static int old_password_authenticate(MYSQL_PLUGIN_VIO *vio,
   uchar *pkt;
   int pkt_len;
   MPVIO_EXT *mpvio= (MPVIO_EXT *) vio;
+  THD *thd=mpvio->thd;
 
   /* generate the scramble, or reuse the old one */
-  if (mpvio->scramble[SCRAMBLE_LENGTH])
-    create_random_string(mpvio->scramble, SCRAMBLE_LENGTH, mpvio->rand);
-
-  /* send it to the client */
-  if (mpvio->write_packet(mpvio, (uchar*) mpvio->scramble, SCRAMBLE_LENGTH + 1))
-    return CR_ERROR;
+  if (thd->scramble[SCRAMBLE_LENGTH])
+  {
+    create_random_string(thd->scramble, SCRAMBLE_LENGTH, &thd->rand);
+    /* and send it to the client */
+    if (mpvio->write_packet(mpvio, (uchar*)thd->scramble, SCRAMBLE_LENGTH + 1))
+      return CR_ERROR;
+  }
 
   /* read the reply and authenticate */
   if ((pkt_len= mpvio->read_packet(mpvio, &pkt)) < 0)
@@ -9591,9 +9304,9 @@ static int old_password_authenticate(MYSQL_PLUGIN_VIO *vio,
     pkt_len= strnlen((char*)pkt, pkt_len);
 
   if (pkt_len == 0) /* no password */
-    return mpvio->acl_user->salt_len != 0 ? CR_ERROR : CR_OK;
+    return info->auth_string[0] ? CR_ERROR : CR_OK;
 
-  if (secure_auth(mpvio))
+  if (secure_auth(thd))
     return CR_ERROR;
 
   info->password_used= PASSWORD_USED_YES;
@@ -9603,12 +9316,12 @@ static int old_password_authenticate(MYSQL_PLUGIN_VIO *vio,
     if (!mpvio->acl_user->salt_len)
       return CR_ERROR;
 
-    return check_scramble_323(pkt, mpvio->scramble,
+    return check_scramble_323(pkt, thd->scramble,
                              (ulong *) mpvio->acl_user->salt) ? 
                              CR_ERROR : CR_OK;
   }
 
-  inc_host_errors(mpvio->ip);
+  inc_host_errors(mpvio->thd->security_ctx->ip);
   my_error(ER_HANDSHAKE_ERROR, MYF(0));
   return CR_ERROR;
 }
@@ -9660,3 +9373,35 @@ mysql_declare_plugin(mysql_password)
 }
 mysql_declare_plugin_end;
 
+maria_declare_plugin(mysql_password)
+{
+  MYSQL_AUTHENTICATION_PLUGIN,                  /* type constant    */
+  &native_password_handler,                     /* type descriptor  */
+  native_password_plugin_name.str,              /* Name             */
+  "R.J.Silk, Sergei Golubchik",                 /* Author           */
+  "Native MySQL authentication",                /* Description      */
+  PLUGIN_LICENSE_GPL,                           /* License          */
+  NULL,                                         /* Init function    */
+  NULL,                                         /* Deinit function  */
+  0x0100,                                       /* Version (1.0)    */
+  NULL,                                         /* status variables */
+  NULL,                                         /* system variables */
+  "1.0",                                        /* String version   */
+  MariaDB_PLUGIN_MATURITY_BETA                  /* Maturity         */
+},
+{
+  MYSQL_AUTHENTICATION_PLUGIN,                  /* type constant    */
+  &old_password_handler,                        /* type descriptor  */
+  old_password_plugin_name.str,                 /* Name             */
+  "R.J.Silk, Sergei Golubchik",                 /* Author           */
+  "Old MySQL-4.0 authentication",               /* Description      */
+  PLUGIN_LICENSE_GPL,                           /* License          */
+  NULL,                                         /* Init function    */
+  NULL,                                         /* Deinit function  */
+  0x0100,                                       /* Version (1.0)    */
+  NULL,                                         /* status variables */
+  NULL,                                         /* system variables */
+  "1.0",                                        /* String version   */
+  MariaDB_PLUGIN_MATURITY_BETA                  /* Maturity         */
+}
+maria_declare_plugin_end;
diff --git a/sql/sql_acl.h b/sql/sql_acl.h
index 050649d39bc..3169746419c 100644
--- a/sql/sql_acl.h
+++ b/sql/sql_acl.h
@@ -173,6 +173,14 @@ enum mysql_db_table_field
 extern const TABLE_FIELD_DEF mysql_db_table_def;
 extern bool mysql_user_table_is_in_short_password_format;
 
+
+static inline int access_denied_error_code(int passwd_used)
+{
+  return passwd_used == 2 ? ER_ACCESS_DENIED_NO_PASSWORD_ERROR
+                          : ER_ACCESS_DENIED_ERROR;
+}
+
+
 /* prototypes */
 
 bool hostname_requires_resolving(const char *hostname);
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index ab59ff501aa..8a2f475b6f2 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -29,6 +29,35 @@
 #include "sql_parse.h"                       // check_table_access
 #include "sql_admin.h"
 
+/* Prepare, run and cleanup for mysql_recreate_table() */
+
+static bool admin_recreate_table(THD *thd, TABLE_LIST *table_list)
+{
+  bool result_code;
+  DBUG_ENTER("admin_recreate_table");
+
+  trans_rollback_stmt(thd);
+  trans_rollback(thd);
+  close_thread_tables(thd);
+  thd->mdl_context.release_transactional_locks();
+  DEBUG_SYNC(thd, "ha_admin_try_alter");
+  tmp_disable_binlog(thd); // binlogging is done by caller if wanted
+  result_code= mysql_recreate_table(thd, table_list);
+  reenable_binlog(thd);
+  /*
+    mysql_recreate_table() can push OK or ERROR.
+    Clear 'OK' status. If there is an error, keep it:
+    we will store the error message in a result set row 
+    and then clear.
+  */
+  if (thd->stmt_da->is_ok())
+    thd->stmt_da->reset_diagnostics_area();
+  table_list->table= NULL;
+  result_code= result_code ? HA_ADMIN_FAILED : HA_ADMIN_OK;
+  DBUG_RETURN(result_code);
+}
+
+
 static int send_check_errmsg(THD *thd, TABLE_LIST* table,
 			     const char* operator_name, const char* errmsg)
 
@@ -278,7 +307,9 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
   Protocol *protocol= thd->protocol;
   LEX *lex= thd->lex;
   int result_code;
+  bool need_repair_or_alter= 0;
   DBUG_ENTER("mysql_admin_table");
+  DBUG_PRINT("enter", ("extra_open_options: %u", extra_open_options));
 
   field_list.push_back(item = new Item_empty_string("Table", NAME_CHAR_LEN*2));
   item->maybe_null = 1;
@@ -296,15 +327,13 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
 
   for (table= tables; table; table= table->next_local)
   {
-    char table_name[NAME_LEN*2+2];
+    char table_name[SAFE_NAME_LEN*2+2];
     char* db = table->db;
     bool fatal_error=0;
     bool open_error;
 
     DBUG_PRINT("admin", ("table: '%s'.'%s'", table->db, table->table_name));
-    DBUG_PRINT("admin", ("extra_open_options: %u", extra_open_options));
     strxmov(table_name, db, ".", table->table_name, NullS);
-    thd->open_options|= extra_open_options;
     table->lock_type= lock_type;
     /*
       To make code safe for re-execution we need to reset type of MDL
@@ -335,6 +364,13 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
       if (view_operator_func == NULL)
         table->required_type=FRMTYPE_TABLE;
 
+      if (lex->sql_command == SQLCOM_CHECK ||
+          lex->sql_command == SQLCOM_REPAIR ||
+          lex->sql_command == SQLCOM_ANALYZE ||
+          lex->sql_command == SQLCOM_OPTIMIZE)
+	thd->prepare_derived_at_open= TRUE;
+
+      thd->open_options|= extra_open_options;
       if (!thd->locked_tables_mode && repair_table_use_frm)
       {
         /*
@@ -367,10 +403,11 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
 
         open_error= open_and_lock_tables(thd, table, TRUE, 0);
       }
+      thd->open_options&= ~extra_open_options;
+      thd->prepare_derived_at_open= FALSE;
 
       table->next_global= save_next_global;
       table->next_local= save_next_local;
-      thd->open_options&= ~extra_open_options;
 
       /*
         If open_and_lock_tables() failed, close_thread_tables() will close
@@ -576,28 +613,14 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
     if (operator_func == &handler::ha_repair &&
         !(check_opt->sql_flags & TT_USEFRM))
     {
-      if ((table->table->file->check_old_types() == HA_ADMIN_NEEDS_ALTER) ||
-          (table->table->file->ha_check_for_upgrade(check_opt) ==
-           HA_ADMIN_NEEDS_ALTER))
+      handler *file= table->table->file;
+      int check_old_types=   file->check_old_types();
+      int check_for_upgrade= file->ha_check_for_upgrade(check_opt);
+
+      if (check_old_types == HA_ADMIN_NEEDS_ALTER ||
+          check_for_upgrade == HA_ADMIN_NEEDS_ALTER)
       {
-        DBUG_PRINT("admin", ("recreating table"));
-        trans_rollback_stmt(thd);
-        trans_rollback(thd);
-        close_thread_tables(thd);
-        thd->mdl_context.release_transactional_locks();
-        tmp_disable_binlog(thd); // binlogging is done by caller if wanted
-        result_code= mysql_recreate_table(thd, table);
-        reenable_binlog(thd);
-        /*
-          mysql_recreate_table() can push OK or ERROR.
-          Clear 'OK' status. If there is an error, keep it:
-          we will store the error message in a result set row 
-          and then clear.
-        */
-        if (thd->stmt_da->is_ok())
-          thd->stmt_da->reset_diagnostics_area();
-        table->table= NULL;
-        result_code= result_code ? HA_ADMIN_FAILED : HA_ADMIN_OK;
+        result_code= admin_recreate_table(thd, table);
         goto send_result;
       }
     }
@@ -606,6 +629,14 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
     result_code = (table->table->file->*operator_func)(thd, check_opt);
     DBUG_PRINT("admin", ("operator_func returned: %d", result_code));
 
+    if (result_code == HA_ADMIN_NOT_IMPLEMENTED && need_repair_or_alter)
+    {
+      /*
+        repair was not implemented and we need to upgrade the table
+        to a new version so we recreate the table with ALTER TABLE
+      */
+      result_code= admin_recreate_table(thd, table);
+    }
 send_result:
 
     lex->cleanup_after_one_table_open();
@@ -699,11 +730,6 @@ send_result_message:
         reopen the table and do ha_innobase::analyze() on it.
         We have to end the row, so analyze could return more rows.
       */
-      trans_commit_stmt(thd);
-      trans_commit(thd);
-      close_thread_tables(thd);
-      thd->mdl_context.release_transactional_locks();
-      DEBUG_SYNC(thd, "ha_admin_try_alter");
       protocol->store(STRING_WITH_LEN("note"), system_charset_info);
       protocol->store(STRING_WITH_LEN(
           "Table does not support optimize, doing recreate + analyze instead"),
@@ -714,22 +740,13 @@ send_result_message:
       TABLE_LIST *save_next_local= table->next_local,
                  *save_next_global= table->next_global;
       table->next_local= table->next_global= 0;
-      tmp_disable_binlog(thd); // binlogging is done by caller if wanted
-      result_code= mysql_recreate_table(thd, table);
-      reenable_binlog(thd);
-      /*
-        mysql_recreate_table() can push OK or ERROR.
-        Clear 'OK' status. If there is an error, keep it:
-        we will store the error message in a result set row 
-        and then clear.
-      */
-      if (thd->stmt_da->is_ok())
-        thd->stmt_da->reset_diagnostics_area();
+      result_code= admin_recreate_table(thd, table);
+
       trans_commit_stmt(thd);
       trans_commit(thd);
       close_thread_tables(thd);
       thd->mdl_context.release_transactional_locks();
-      table->table= NULL;
+
       if (!result_code) // recreation went ok
       {
         /* Clear the ticket released above. */
diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc
index 6247d581830..c6c02773286 100644
--- a/sql/sql_alter.cc
+++ b/sql/sql_alter.cc
@@ -103,7 +103,7 @@ bool Alter_table_statement::execute(THD *thd)
                             &alter_info,
                             select_lex->order_list.elements,
                             select_lex->order_list.first,
-                            lex->ignore);
+                            lex->ignore, lex->online);
 
   DBUG_RETURN(result);
 }
diff --git a/sql/sql_analyse.cc b/sql/sql_analyse.cc
index a6bd26ba45a..d99337bd210 100644
--- a/sql/sql_analyse.cc
+++ b/sql/sql_analyse.cc
@@ -247,7 +247,8 @@ bool test_if_number(NUM_INFO *info, const char *str, uint str_len)
       }
       DBUG_RETURN(0);
     }
-    for (str++; *(end - 1) == '0'; end--) ; // jump over zeros at the end
+    for (str++; *(end - 1) == '0'; end--)  // jump over zeros at the end
+      ;
     if (str == end)		     // number was something like '123.000'
     {
       char *endpos= (char*) str;
@@ -748,7 +749,7 @@ int analyse::end_of_records()
 	tmp_str.append(STRING_WITH_LEN(" NOT NULL"));
       output_str_length = tmp_str.length();
       func_items[9]->set(tmp_str.ptr(), tmp_str.length(), tmp_str.charset());
-      if (result->send_data(result_fields))
+      if (result->send_data(result_fields) > 0)
 	return -1;
       continue;
     }
@@ -793,7 +794,7 @@ int analyse::end_of_records()
     if (!(*f)->nulls)
       ans.append(STRING_WITH_LEN(" NOT NULL"));
     func_items[9]->set(ans.ptr(), ans.length(), ans.charset());
-    if (result->send_data(result_fields))
+    if (result->send_data(result_fields) > 0)
       return -1;
   }
   return 0;
@@ -1002,9 +1003,9 @@ void field_decimal::get_opt_type(String *answer,
   my_decimal_set_zero(&zero);
   my_bool is_unsigned= (my_decimal_cmp(&zero, &min_arg) >= 0);
 
-  length= my_snprintf(buff, sizeof(buff), "DECIMAL(%d, %d)",
-                      (int) (max_length - (item->decimals ? 1 : 0)),
-                      item->decimals);
+  length= sprintf(buff, "DECIMAL(%d, %d)",
+                  (int) (max_length - (item->decimals ? 1 : 0)),
+                  item->decimals);
   if (is_unsigned)
     length= (uint) (strmov(buff+length, " UNSIGNED")- buff);
   answer->append(buff, length);
diff --git a/sql/sql_analyse.h b/sql/sql_analyse.h
index ed6d10e6eb7..34c8e2da3d4 100644
--- a/sql/sql_analyse.h
+++ b/sql/sql_analyse.h
@@ -76,7 +76,7 @@ class field_info :public Sql_alloc
 protected:
   ulong   treemem, tree_elements, empty, nulls, min_length, max_length;
   uint	  room_in_tree;
-  my_bool found;
+  bool found;
   TREE	  tree;
   Item	  *item;
   analyse *pc;
diff --git a/sql/sql_array.h b/sql/sql_array.h
index 06318c000e2..67f1f1c2ad7 100644
--- a/sql/sql_array.h
+++ b/sql/sql_array.h
@@ -70,4 +70,74 @@ public:
   }
 };
 
+/* 
+  Array of pointers to Elem that uses memory from MEM_ROOT
+
+  MEM_ROOT has no realloc() so this is supposed to be used for cases when
+  reallocations are rare.
+*/
+
+template <class Elem> class Array
+{
+  enum {alloc_increment = 16};
+  Elem **buffer;
+  uint n_elements, max_element;
+public:
+  Array(MEM_ROOT *mem_root, uint prealloc=16)
+  {
+    buffer= (Elem**)alloc_root(mem_root, prealloc * sizeof(Elem**));
+    max_element = buffer? prealloc : 0;
+    n_elements= 0;
+  }
+
+  Elem& at(int idx)
+  {
+    return *(((Elem*)buffer) + idx);
+  }
+
+  Elem **front()
+  {
+    return buffer;
+  }
+
+  Elem **back()
+  {
+    return buffer + n_elements;
+  }
+
+  bool append(MEM_ROOT *mem_root, Elem *el)
+  {
+    if (n_elements == max_element)
+    {
+      Elem **newbuf;
+      if (!(newbuf= (Elem**)alloc_root(mem_root, (n_elements + alloc_increment)*
+                                                  sizeof(Elem**))))
+      {
+        return FALSE;
+      }
+      memcpy(newbuf, buffer, n_elements*sizeof(Elem*));
+      buffer= newbuf;
+    }
+    buffer[n_elements++]= el;
+    return FALSE;
+  }
+
+  int elements()
+  {
+    return n_elements;
+  }
+
+  void clear()
+  {
+    n_elements= 0;
+  }
+
+  typedef int (*CMP_FUNC)(Elem * const *el1, Elem *const *el2);
+
+  void sort(CMP_FUNC cmp_func)
+  {
+    my_qsort(buffer, n_elements, sizeof(Elem*), (qsort_cmp)cmp_func);
+  }
+};
+
 #endif /* SQL_ARRAY_INCLUDED */
diff --git a/sql/sql_audit.cc b/sql/sql_audit.cc
index d0a9f412a43..131a71c1d6b 100644
--- a/sql/sql_audit.cc
+++ b/sql/sql_audit.cc
@@ -160,7 +160,7 @@ static my_bool acquire_plugins(THD *thd, plugin_ref plugin, void *arg)
   }
   
   /* lock the plugin and add it to the list */
-  plugin= my_plugin_lock(NULL, &plugin);
+  plugin= my_plugin_lock(NULL, plugin);
   insert_dynamic(&thd->audit_class_plugins, (uchar*) &plugin);
 
   return 0;
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 644796ea337..892757d77fe 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -38,6 +39,7 @@
                          // mysql_handle_derived,
                          // mysql_derived_filling
 #include "sql_handler.h" // mysql_ha_flush
+#include "sql_test.h"
 #include "sql_partition.h"                      // ALTER_PARTITION_PARAM_TYPE
 #include "log_event.h"                          // Query_log_event
 #include "sql_select.h"
@@ -314,8 +316,12 @@ uint create_table_def_key(THD *thd, char *key,
                           const TABLE_LIST *table_list,
                           bool tmp_table)
 {
-  uint key_length= (uint) (strmov(strmov(key, table_list->db)+1,
-                                  table_list->table_name)-key)+1;
+  char *db_end= strnmov(key, table_list->db, MAX_DBKEY_LENGTH - 2);
+  *db_end++= '\0';
+  char *table_end= strnmov(db_end, table_list->table_name,
+                           key + MAX_DBKEY_LENGTH - 1 - db_end);
+  *table_end++= '\0';
+  uint key_length= (uint) (table_end-key);
   if (tmp_table)
   {
     int4store(key + key_length, thd->server_id);
@@ -674,7 +680,7 @@ get_table_share_with_discover(THD *thd, TABLE_LIST *table_list,
 {
   TABLE_SHARE *share;
   bool exists;
-  DBUG_ENTER("get_table_share_with_create");
+  DBUG_ENTER("get_table_share_with_discover");
 
   share= get_table_share(thd, table_list, key, key_length, db_flags, error,
                          hash_value);
@@ -812,7 +818,7 @@ void release_table_share(TABLE_SHARE *share)
 
 TABLE_SHARE *get_cached_table_share(const char *db, const char *table_name)
 {
-  char key[NAME_LEN*2+2];
+  char key[SAFE_NAME_LEN*2+2];
   TABLE_LIST table_list;
   uint key_length;
   mysql_mutex_assert_owner(&LOCK_open);
@@ -1406,8 +1412,10 @@ close_all_tables_for_name(THD *thd, TABLE_SHARE *share,
       mysql_lock_remove(thd, thd->lock, table);
 
       /* Inform handler that table will be dropped after close */
-      if (table->db_stat) /* Not true for partitioned tables. */
+#ifdef MERGE_FOR_MONTY_TO_FIX
+      if (remove_from_locked_tables && table->db_stat) /* Not true for partitioned tables. */
         table->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
+#endif
       close_thread_table(thd, prev);
     }
     else
@@ -1445,6 +1453,11 @@ void close_thread_tables(THD *thd)
   TABLE *table;
   DBUG_ENTER("close_thread_tables");
 
+#ifdef WITH_ARIA_STORAGE_ENGINE
+  if (!thd->in_sub_stmt)
+    ha_maria::implicit_commit(thd, FALSE);
+#endif
+
 #ifdef EXTRA_DEBUG
   DBUG_PRINT("tcache", ("open tables:"));
   for (table= thd->open_tables; table; table= table->next)
@@ -1573,9 +1586,12 @@ bool close_thread_table(THD *thd, TABLE **table_ptr)
   bool found_old_table= 0;
   TABLE *table= *table_ptr;
   DBUG_ENTER("close_thread_table");
+  DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str,
+                        table->s->table_name.str, (long) table));
   DBUG_ASSERT(table->key_read == 0);
   DBUG_ASSERT(!table->file || table->file->inited == handler::NONE);
   mysql_mutex_assert_not_owner(&LOCK_open);
+
   /*
     The metadata lock must be released after giving back
     the table to the table cache.
@@ -1586,6 +1602,12 @@ bool close_thread_table(THD *thd, TABLE **table_ptr)
                                              MDL_SHARED));
   table->mdl_ticket= NULL;
 
+  if (table->file)
+  {
+    table->file->update_global_table_stats();
+    table->file->update_global_index_stats();
+  }
+
   mysql_mutex_lock(&thd->LOCK_thd_data);
   *table_ptr=table->next;
   mysql_mutex_unlock(&thd->LOCK_thd_data);
@@ -1662,12 +1684,11 @@ bool close_temporary_tables(THD *thd)
 
   /* Better add "if exists", in case a RESET MASTER has been done */
   const char stub[]= "DROP /*!40005 TEMPORARY */ TABLE IF EXISTS ";
-  uint stub_len= sizeof(stub) - 1;
-  char buf[256];
-  String s_query= String(buf, sizeof(buf), system_charset_info);
+  char buf[FN_REFLEN];
+  String s_query(buf, sizeof(buf), system_charset_info);
   bool found_user_tables= FALSE;
 
-  memcpy(buf, stub, stub_len);
+  s_query.copy(stub, sizeof(stub)-1, system_charset_info);
 
   /*
     Insertion sort of temp tables by pseudo_thread_id to build ordered list
@@ -1721,19 +1742,25 @@ bool close_temporary_tables(THD *thd)
     {
       bool save_thread_specific_used= thd->thread_specific_used;
       my_thread_id save_pseudo_thread_id= thd->variables.pseudo_thread_id;
+      char db_buf[FN_REFLEN];
+      String db(db_buf, sizeof(db_buf), system_charset_info);
+
       /* Set pseudo_thread_id to be that of the processed table */
       thd->variables.pseudo_thread_id= tmpkeyval(thd, table);
-      String db;
-      db.append(table->s->db.str);
+
+      db.copy(table->s->db.str, table->s->db.length, system_charset_info);
+      /* Reset s_query() if changed by previous loop */
+      s_query.length(sizeof(stub)-1);
+
       /* Loop forward through all tables that belong to a common database
          within the sublist of common pseudo_thread_id to create single
          DROP query 
       */
-      for (s_query.length(stub_len);
+      for (;
            table && is_user_table(table) &&
              tmpkeyval(thd, table) == thd->variables.pseudo_thread_id &&
              table->s->db.length == db.length() &&
-             strcmp(table->s->db.str, db.ptr()) == 0;
+             memcmp(table->s->db.str, db.ptr(), db.length()) == 0;
            table= next)
       {
         /*
@@ -2139,7 +2166,7 @@ int drop_temporary_table(THD *thd, TABLE_LIST *table_list, bool *is_trans)
   /* Table might be in use by some outer statement. */
   if (table->query_id && table->query_id != thd->query_id)
   {
-    my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias);
+    my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias.c_ptr());
     DBUG_RETURN(-1);
   }
 
@@ -2165,7 +2192,7 @@ void close_temporary_table(THD *thd, TABLE *table,
   DBUG_ENTER("close_temporary_table");
   DBUG_PRINT("tmptable", ("closing table: '%s'.'%s' 0x%lx  alias: '%s'",
                           table->s->db.str, table->s->table_name.str,
-                          (long) table, table->alias));
+                          (long) table, table->alias.c_ptr()));
 
   if (table->prev)
   {
@@ -2212,6 +2239,13 @@ void close_temporary(TABLE *table, bool free_share, bool delete_table)
   DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'",
                           table->s->db.str, table->s->table_name.str));
 
+  /* in_use is not set for replication temporary tables during shutdown */
+  if (table->in_use)
+  {
+    table->file->update_global_table_stats();
+    table->file->update_global_index_stats();
+  }
+
   free_io_cache(table);
   closefrm(table, 0);
   if (delete_table)
@@ -2321,7 +2355,6 @@ void drop_open_table(THD *thd, TABLE *table, const char *db_name,
     DBUG_ASSERT(table == thd->open_tables);
 
     handlerton *table_type= table->s->db_type();
-
     table->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
     close_thread_table(thd, &thd->open_tables);
     /* Remove the table share from the table cache. */
@@ -2707,7 +2740,7 @@ bool open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
                      ("query_id: %lu  server_id: %u  pseudo_thread_id: %lu",
                       (ulong) table->query_id, (uint) thd->server_id,
                       (ulong) thd->variables.pseudo_thread_id));
-	  my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias);
+	  my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias.c_ptr());
 	  DBUG_RETURN(TRUE);
 	}
 	table->query_id= thd->query_id;
@@ -2747,7 +2780,7 @@ bool open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
       if (table->s->table_cache_key.length == key_length &&
 	  !memcmp(table->s->table_cache_key.str, key, key_length))
       {
-        if (!my_strcasecmp(system_charset_info, table->alias, alias) &&
+        if (!my_strcasecmp(system_charset_info, table->alias.c_ptr(), alias) &&
             table->query_id != thd->query_id && /* skip tables already used */
             (thd->locked_tables_mode == LTM_LOCK_TABLES ||
              table->query_id == 0))
@@ -3132,7 +3165,7 @@ retry_share:
 
  reset:
   /*
-    Check that there is no reference to a condtion from an earlier query
+    Check that there is no reference to a condition from an earlier query
     (cf. Bug#58553). 
   */
   DBUG_ASSERT(table->file->pushed_cond == NULL);
@@ -5286,7 +5319,8 @@ static bool check_lock_and_start_stmt(THD *thd,
   if ((int) lock_type > (int) TL_WRITE_ALLOW_WRITE &&
       (int) table_list->table->reginfo.lock_type <= (int) TL_WRITE_ALLOW_WRITE)
   {
-    my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), table_list->alias);
+    my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0),
+             table_list->table->alias.c_ptr());
     DBUG_RETURN(1);
   }
   if ((error= table_list->table->file->start_stmt(thd, lock_type)))
@@ -5513,16 +5547,11 @@ bool open_and_lock_tables(THD *thd, TABLE_LIST *tables,
 
   if (derived)
   {
-    if (mysql_handle_derived(thd->lex, &mysql_derived_prepare))
+    if (mysql_handle_derived(thd->lex, DT_INIT))
       goto err;
-    if (thd->fill_derived_tables() &&
-        mysql_handle_derived(thd->lex, &mysql_derived_filling))
-    {
-      mysql_handle_derived(thd->lex, &mysql_derived_cleanup);
+    if (thd->prepare_derived_at_open &&
+        (mysql_handle_derived(thd->lex, DT_PREPARE)))
       goto err;
-    }
-    if (!thd->lex->describe)
-      mysql_handle_derived(thd->lex, &mysql_derived_cleanup);
   }
 
   DBUG_RETURN(FALSE);
@@ -5546,6 +5575,7 @@ err:
     flags       - bitmap of flags to modify how the tables will be open:
                   MYSQL_LOCK_IGNORE_FLUSH - open table even if someone has
                   done a flush on it.
+    dt_phases   - set of flags to pass to the mysql_handle_derived
 
   RETURN
     FALSE - ok
@@ -5556,7 +5586,8 @@ err:
     data from the tables.
 */
 
-bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags)
+bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags,
+                                    uint dt_phases)
 {
   DML_prelocking_strategy prelocking_strategy;
   uint counter;
@@ -5564,7 +5595,7 @@ bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags)
   DBUG_ENTER("open_normal_and_derived_tables");
   DBUG_ASSERT(!thd->fill_derived_tables());
   if (open_tables(thd, &tables, &counter, flags, &prelocking_strategy) ||
-      mysql_handle_derived(thd->lex, &mysql_derived_prepare))
+      mysql_handle_derived(thd->lex, dt_phases))
     goto end;
 
   DBUG_RETURN(0);
@@ -5999,6 +6030,9 @@ static void update_field_dependencies(THD *thd, Field *field, TABLE *table)
     table->covering_keys.intersect(field->part_of_key);
     table->merge_keys.merge(field->part_of_key);
 
+    if (field->vcol_info)
+      table->mark_virtual_col(field);
+
     if (thd->mark_used_columns == MARK_COLUMNS_READ)
       bitmap= table->read_set;
     else
@@ -6068,9 +6102,7 @@ find_field_in_view(THD *thd, TABLE_LIST *table_list,
   Field_iterator_view field_it;
   field_it.set(table_list);
   Query_arena *arena= 0, backup;  
-  
-  DBUG_ASSERT(table_list->schema_table_reformed ||
-              (ref != 0 && table_list->view != 0));
+
   for (; !field_it.end_of_fields(); field_it.next())
   {
     if (!my_strcasecmp(system_charset_info, field_it.name(), name))
@@ -6089,6 +6121,8 @@ find_field_in_view(THD *thd, TABLE_LIST *table_list,
       
       if (!item)
         DBUG_RETURN(0);
+      if (!ref)
+        DBUG_RETURN((Field*) view_ref_found);
       /*
        *ref != NULL means that *ref contains the item that we need to
        replace. If the item was aliased by the user, set the alias to
@@ -6097,10 +6131,15 @@ find_field_in_view(THD *thd, TABLE_LIST *table_list,
       */
       if (*ref && !(*ref)->is_autogenerated_name)
       {
+        if (register_tree_change &&
+            thd->stmt_arena->is_stmt_prepare_or_first_stmt_execute())
+          arena= thd->activate_stmt_arena_if_needed(&backup);
         item->set_name((*ref)->name, (*ref)->name_length,
                        system_charset_info);
         item->real_item()->set_name((*ref)->name, (*ref)->name_length,
                        system_charset_info);
+        if (arena)
+          thd->restore_active_arena(arena, &backup);
       }
       if (register_tree_change)
         thd->change_item_tree(ref, item);
@@ -6226,6 +6265,7 @@ find_field_in_natural_join(THD *thd, TABLE_LIST *table_ref, const char *name,
   {
     /* This is a base table. */
     DBUG_ASSERT(nj_col->view_field == NULL);
+    Item *ref= 0;
     /*
       This fix_fields is not necessary (initially this item is fixed by
       the Item_field constructor; after reopen_tables the Item_func_eq
@@ -6233,12 +6273,13 @@ find_field_in_natural_join(THD *thd, TABLE_LIST *table_ref, const char *name,
       reopening for columns that was dropped by the concurrent connection.
     */
     if (!nj_col->table_field->fixed &&
-        nj_col->table_field->fix_fields(thd, (Item **)&nj_col->table_field))
+        nj_col->table_field->fix_fields(thd, &ref))
     {
       DBUG_PRINT("info", ("column '%s' was dropped by the concurrent connection",
                           nj_col->table_field->name));
       DBUG_RETURN(NULL);
     }
+    DBUG_ASSERT(ref == 0);                      // Should not have changed
     DBUG_ASSERT(nj_col->table_ref->table == nj_col->table_field->field->table);
     found_field= nj_col->table_field->field;
     update_field_dependencies(thd, found_field, nj_col->table_ref->table);
@@ -6277,7 +6318,8 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length,
   Field **field_ptr, *field;
   uint cached_field_index= *cached_field_index_ptr;
   DBUG_ENTER("find_field_in_table");
-  DBUG_PRINT("enter", ("table: '%s', field name: '%s'", table->alias, name));
+  DBUG_PRINT("enter", ("table: '%s', field name: '%s'", table->alias.c_ptr(),
+                       name));
 
   /* We assume here that table->field < NO_CACHED_FIELD_INDEX = UINT_MAX */
   if (cached_field_index < table->s->fields &&
@@ -6492,6 +6534,8 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list,
         Field *field_to_set= NULL;
         if (fld == view_ref_found)
         {
+          if (!ref)
+            DBUG_RETURN(fld);
           Item *it= (*ref)->real_item();
           if (it->type() == Item::FIELD_ITEM)
             field_to_set= ((Item_field*)it)->field;
@@ -6499,6 +6543,8 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list,
           {
             if (thd->mark_used_columns == MARK_COLUMNS_READ)
               it->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+            else
+              it->walk(&Item::register_field_in_write_map, 1, (uchar *) 0);
           }
         }
         else
@@ -6608,7 +6654,7 @@ find_field_in_tables(THD *thd, Item_ident *item,
   const char *table_name= item->table_name;
   const char *name= item->field_name;
   uint length=(uint) strlen(name);
-  char name_buff[NAME_LEN+1];
+  char name_buff[SAFE_NAME_LEN+1];
   TABLE_LIST *cur_table= first_table;
   TABLE_LIST *actual_table;
   bool allow_rowid;
@@ -6638,8 +6684,11 @@ find_field_in_tables(THD *thd, Item_ident *item,
       find_field_in_table even in the case of information schema tables
       when table_ref->field_translation != NULL.
       */
-    if (table_ref->table && !table_ref->view)
+    if (table_ref->table && !table_ref->view &&
+        (!table_ref->is_merged_derived() ||
+         (!table_ref->is_multitable() && table_ref->merged_for_insert)))
     {
+
       found= find_field_in_table(thd, table_ref->table, name, length,
                                  TRUE, &(item->cached_field_index));
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
@@ -6664,7 +6713,8 @@ find_field_in_tables(THD *thd, Item_ident *item,
         Only views fields should be marked as dependent, not an underlying
         fields.
       */
-      if (!table_ref->belong_to_view)
+      if (!table_ref->belong_to_view &&
+          !table_ref->belong_to_derived)
       {
         SELECT_LEX *current_sel= thd->lex->current_select;
         SELECT_LEX *last_select= table_ref->select_lex;
@@ -6673,12 +6723,16 @@ find_field_in_tables(THD *thd, Item_ident *item,
           sub query as dependent on the outer query
         */
         if (current_sel != last_select)
+        {
           mark_select_range_as_dependent(thd, last_select, current_sel,
                                          found, *ref, item);
+        }
       }
       return found;
     }
   }
+  else
+    item->can_be_depended= TRUE;
 
   if (db && lower_case_table_names)
   {
@@ -6773,7 +6827,7 @@ find_field_in_tables(THD *thd, Item_ident *item,
       (report_error == REPORT_ALL_ERRORS ||
        report_error == REPORT_EXCEPT_NON_UNIQUE))
   {
-    char buff[NAME_LEN*2 + 2];
+    char buff[SAFE_NAME_LEN*2 + 2];
     if (db && db[0])
     {
       strxnmov(buff,sizeof(buff)-1,db,".",table_name,NullS);
@@ -7250,6 +7304,10 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2,
     */
     if (nj_col_2 && (!using_fields ||is_using_column_1))
     {
+      /*
+        Create non-fixed fully qualified field and let fix_fields to
+        resolve it.
+      */
       Item *item_1=   nj_col_1->create_item(thd);
       Item *item_2=   nj_col_2->create_item(thd);
       Field *field_1= nj_col_1->field();
@@ -7695,7 +7753,11 @@ static bool setup_natural_join_row_types(THD *thd,
   for (left_neighbor= table_ref_it++; left_neighbor ; )
   {
     table_ref= left_neighbor;
-    left_neighbor= table_ref_it++;
+    do
+    {
+      left_neighbor= table_ref_it++;
+    }
+    while (left_neighbor && left_neighbor->sj_subq_pred);
     /* 
       Do not redo work if already done:
       1) for stored procedures,
@@ -7739,13 +7801,11 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
 	       List<Item> *sum_func_list,
 	       uint wild_num)
 {
-  if (!wild_num)
-    return(0);
-
   Item *item;
   List_iterator<Item> it(fields);
   Query_arena *arena, backup;
   DBUG_ENTER("setup_wild");
+  DBUG_ASSERT(wild_num != 0);
 
   /*
     Don't use arena if we are not in prepared statements or stored procedures
@@ -7804,6 +7864,9 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
     /* make * substituting permanent */
     SELECT_LEX *select_lex= thd->lex->current_select;
     select_lex->with_wild= 0;
+#ifdef HAVE_valgrind
+    if (&select_lex->item_list != &fields)      // Avoid warning
+#endif
     /*   
       The assignment below is translated to memcpy() call (at least on some
       platforms). memcpy() expects that source and destination areas do not
@@ -7831,6 +7894,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
   List_iterator<Item> it(fields);
   bool save_is_item_list_lookup;
   DBUG_ENTER("setup_fields");
+  DBUG_PRINT("enter", ("ref_pointer_array: %p", ref_pointer_array));
 
   thd->mark_used_columns= mark_used_columns;
   DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
@@ -7908,27 +7972,36 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
     make_leaves_list()
     list    pointer to pointer on list first element
     tables  table list
+    full_table_list whether to include tables from mergeable derived table/view.
+                    we need them for checks for INSERT/UPDATE statements only.
 
   RETURN pointer on pointer to next_leaf of last element
 */
 
-TABLE_LIST **make_leaves_list(TABLE_LIST **list, TABLE_LIST *tables)
+void make_leaves_list(List<TABLE_LIST> &list, TABLE_LIST *tables,
+                      bool full_table_list, TABLE_LIST *boundary)
+ 
 {
   for (TABLE_LIST *table= tables; table; table= table->next_local)
   {
-    if (table->merge_underlying_list)
+    if (table == boundary)
+      full_table_list= !full_table_list;
+    if (full_table_list && table->is_merged_derived())
     {
-      DBUG_ASSERT(table->view &&
-                  table->effective_algorithm == VIEW_ALGORITHM_MERGE);
-      list= make_leaves_list(list, table->merge_underlying_list);
+      SELECT_LEX *select_lex= table->get_single_select();
+      /*
+        It's safe to use select_lex->leaf_tables because all derived
+        tables/views were already prepared and has their leaf_tables
+        set properly.
+      */
+      make_leaves_list(list, select_lex->get_table_list(),
+      full_table_list, boundary);
     }
     else
     {
-      *list= table;
-      list= &table->next_leaf;
+      list.push_back(table);
     }
   }
-  return list;
 }
 
 /*
@@ -7943,6 +8016,7 @@ TABLE_LIST **make_leaves_list(TABLE_LIST **list, TABLE_LIST *tables)
     leaves        List of join table leaves list (select_lex->leaf_tables)
     refresh       It is onle refresh for subquery
     select_insert It is SELECT ... INSERT command
+    full_table_list a parameter to pass to the make_leaves_list function
 
   NOTE
     Check also that the 'used keys' and 'ignored keys' exists and set up the
@@ -7961,9 +8035,13 @@ TABLE_LIST **make_leaves_list(TABLE_LIST **list, TABLE_LIST *tables)
 
 bool setup_tables(THD *thd, Name_resolution_context *context,
                   List<TABLE_LIST> *from_clause, TABLE_LIST *tables,
-                  TABLE_LIST **leaves, bool select_insert)
+                  List<TABLE_LIST> &leaves, bool select_insert,
+                  bool full_table_list)
 {
   uint tablenr= 0;
+  List_iterator<TABLE_LIST> ti(leaves);
+  TABLE_LIST *table_list;
+
   DBUG_ENTER("setup_tables");
 
   DBUG_ASSERT ((select_insert && !tables->next_name_resolution_table) || !tables || 
@@ -7975,40 +8053,83 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
   TABLE_LIST *first_select_table= (select_insert ?
                                    tables->next_local:
                                    0);
-  if (!(*leaves))
-    make_leaves_list(leaves, tables);
-
-  TABLE_LIST *table_list;
-  for (table_list= *leaves;
-       table_list;
-       table_list= table_list->next_leaf, tablenr++)
+  SELECT_LEX *select_lex= select_insert ? &thd->lex->select_lex :
+                                          thd->lex->current_select;
+  if (select_lex->first_cond_optimization)
   {
-    TABLE *table= table_list->table;
-    table->pos_in_table_list= table_list;
-    if (first_select_table &&
-        table_list->top_table() == first_select_table)
+    leaves.empty();
+    if (!select_lex->is_prep_leaf_list_saved)
     {
-      /* new counting for SELECT of INSERT ... SELECT command */
-      first_select_table= 0;
-      tablenr= 0;
+      make_leaves_list(leaves, tables, full_table_list, first_select_table);
+      select_lex->leaf_tables_exec.empty();
     }
-    setup_table_map(table, table_list, tablenr);
-    if (table_list->process_index_hints(table))
+    else
+    {
+      List_iterator_fast <TABLE_LIST> ti(select_lex->leaf_tables_prep);
+      while ((table_list= ti++))
+        leaves.push_back(table_list);
+    }
+      
+    while ((table_list= ti++))
+    {
+      TABLE *table= table_list->table;
+      table->pos_in_table_list= table_list;
+      if (first_select_table &&
+          table_list->top_table() == first_select_table)
+      {
+        /* new counting for SELECT of INSERT ... SELECT command */
+        first_select_table= 0;
+        thd->lex->select_lex.insert_tables= tablenr;
+        tablenr= 0;
+      }
+      if(table_list->jtbm_subselect)
+      {
+        table_list->jtbm_table_no= tablenr;
+      }
+      else
+      {
+        table->pos_in_table_list= table_list;
+        setup_table_map(table, table_list, tablenr);
+
+        if (table_list->process_index_hints(table))
+          DBUG_RETURN(1);
+      }
+      tablenr++;
+    }
+    if (tablenr > MAX_TABLES)
+    {
+      my_error(ER_TOO_MANY_TABLES,MYF(0), static_cast<int>(MAX_TABLES));
       DBUG_RETURN(1);
+    }
   }
-  if (tablenr > MAX_TABLES)
-  {
-    my_error(ER_TOO_MANY_TABLES,MYF(0), static_cast<int>(MAX_TABLES));
-    DBUG_RETURN(1);
-  }
+  else
+  { 
+    List_iterator_fast <TABLE_LIST> ti(select_lex->leaf_tables_exec);
+    select_lex->leaf_tables.empty();
+    while ((table_list= ti++))
+    {
+      if(table_list->jtbm_subselect)
+      {
+        table_list->jtbm_table_no= table_list->tablenr_exec;
+      }
+      else
+      {
+        table_list->table->tablenr= table_list->tablenr_exec;
+        table_list->table->map= table_list->map_exec;
+        table_list->table->maybe_null= table_list->maybe_null_exec;
+        table_list->table->pos_in_table_list= table_list;
+      }
+      select_lex->leaf_tables.push_back(table_list);
+    }
+  }    
+
   for (table_list= tables;
        table_list;
        table_list= table_list->next_local)
   {
     if (table_list->merge_underlying_list)
     {
-      DBUG_ASSERT(table_list->view &&
-                  table_list->effective_algorithm == VIEW_ALGORITHM_MERGE);
+      DBUG_ASSERT(table_list->is_merged_derived());
       Query_arena *arena= thd->stmt_arena, backup;
       bool res;
       if (arena->is_conventional())
@@ -8021,6 +8142,17 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
       if (res)
         DBUG_RETURN(1);
     }
+
+    if (table_list->jtbm_subselect)
+    {
+      Item *item= table_list->jtbm_subselect->optimizer;
+      if (table_list->jtbm_subselect->optimizer->fix_fields(thd, &item))
+      {
+        my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES); /* psergey-todo: WHY ER_TOO_MANY_TABLES ???*/
+        DBUG_RETURN(1);
+      }
+      DBUG_ASSERT(item == table_list->jtbm_subselect->optimizer);
+    }
   }
 
   /* Precompute and store the row types of NATURAL/USING joins. */
@@ -8035,7 +8167,7 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
   prepare tables and check access for the view tables
 
   SYNOPSIS
-    setup_tables_and_check_view_access()
+    setup_tables_and_check_access()
     thd		  Thread handler
     context       name resolution contest to setup table list there
     from_clause   Top-level list of table references in the FROM clause
@@ -8045,6 +8177,7 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
     refresh       It is onle refresh for subquery
     select_insert It is SELECT ... INSERT command
     want_access   what access is needed
+    full_table_list a parameter to pass to the make_leaves_list function
 
   NOTE
     a wrapper for check_tables that will also check the resulting
@@ -8058,33 +8191,33 @@ bool setup_tables_and_check_access(THD *thd,
                                    Name_resolution_context *context,
                                    List<TABLE_LIST> *from_clause,
                                    TABLE_LIST *tables,
-                                   TABLE_LIST **leaves,
+                                   List<TABLE_LIST> &leaves,
                                    bool select_insert,
                                    ulong want_access_first,
-                                   ulong want_access)
+                                   ulong want_access,
+                                   bool full_table_list)
 {
-  TABLE_LIST *leaves_tmp= NULL;
   bool first_table= true;
+  DBUG_ENTER("setup_tables_and_check_access");
 
   if (setup_tables(thd, context, from_clause, tables,
-                   &leaves_tmp, select_insert))
-    return TRUE;
-
-  if (leaves)
-    *leaves= leaves_tmp;
+                   leaves, select_insert, full_table_list))
+    DBUG_RETURN(TRUE);
 
-  for (; leaves_tmp; leaves_tmp= leaves_tmp->next_leaf)
+  List_iterator<TABLE_LIST> ti(leaves);
+  TABLE_LIST *table_list;
+  while((table_list= ti++))
   {
-    if (leaves_tmp->belong_to_view && 
+    if (table_list->belong_to_view && !table_list->view && 
         check_single_table_access(thd, first_table ? want_access_first :
-                                  want_access, leaves_tmp, FALSE))
+                                  want_access, table_list, FALSE))
     {
       tables->hide_view_error(thd);
-      return TRUE;
+      DBUG_RETURN(TRUE);
     }
     first_table= 0;
   }
-  return FALSE;
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -8153,7 +8286,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
 {
   Field_iterator_table_ref field_iterator;
   bool found;
-  char name_buff[NAME_LEN+1];
+  char name_buff[SAFE_NAME_LEN+1];
   DBUG_ENTER("insert_fields");
   DBUG_PRINT("arena", ("stmt arena: 0x%lx", (ulong)thd->stmt_arena));
 
@@ -8189,7 +8322,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
     DBUG_ASSERT(tables->is_leaf_for_name_resolution());
 
     if ((table_name && my_strcasecmp(table_alias_charset, table_name,
-                                    tables->alias)) ||
+                                     tables->alias)) ||
         (db_name && strcmp(tables->db,db_name)))
       continue;
 
@@ -8220,8 +8353,10 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
        information_schema table, or a nested table reference. See the comment
        for TABLE_LIST.
     */
-    if (!((table && !tables->view && (table->grant.privilege & SELECT_ACL)) ||
-          (tables->view && (tables->grant.privilege & SELECT_ACL))) &&
+    if (!((table && tables->is_non_derived() &&
+          (table->grant.privilege & SELECT_ACL)) ||
+	  ((!tables->is_non_derived() && 
+	    (tables->grant.privilege & SELECT_ACL)))) &&
         !any_privileges)
     {
       field_iterator.set(tables);
@@ -8251,7 +8386,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
 
       if (!(item= field_iterator.create_item(thd)))
         DBUG_RETURN(TRUE);
-      DBUG_ASSERT(item->fixed);
+//      DBUG_ASSERT(item->fixed);
       /* cache the table for the Item_fields inserted by expanding stars */
       if (item->type() == Item::FIELD_ITEM && tables->cacheable_table)
         ((Item_field *)item)->cached_table= tables;
@@ -8301,6 +8436,12 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
       {
         /* Mark fields as used to allow storage engine to optimze access */
         bitmap_set_bit(field->table->read_set, field->field_index);
+        /*
+          Mark virtual fields for write and others that the virtual fields
+          depend on for read.
+        */
+        if (field->vcol_info)
+          field->table->mark_virtual_col(field);
         if (table)
         {
           table->covering_keys.intersect(field->part_of_key);
@@ -8357,6 +8498,29 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
 }
 
 
+/**
+  Wrap Item_ident
+
+  @param thd             thread handle
+  @param conds           pointer to the condition which should be wrapped
+*/
+
+void wrap_ident(THD *thd, Item **conds)
+{
+  Item_direct_ref_to_ident *wrapper;
+  DBUG_ASSERT((*conds)->type() == Item::FIELD_ITEM || (*conds)->type() == Item::REF_ITEM);
+  Query_arena *arena= thd->stmt_arena, backup;
+  if (arena->is_conventional())
+    arena= 0;
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+  if ((wrapper= new Item_direct_ref_to_ident((Item_ident *)(*conds))))
+    (*conds)= (Item*) wrapper;
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+}
+
+
 /*
   Fix all conditions and outer join expressions.
 
@@ -8375,11 +8539,13 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
     FALSE if all is OK
 */
 
-int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
+int setup_conds(THD *thd, TABLE_LIST *tables, List<TABLE_LIST> &leaves,
                 COND **conds)
 {
   SELECT_LEX *select_lex= thd->lex->current_select;
   TABLE_LIST *table= NULL;	// For HP compilers
+  TABLE_LIST *save_emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
+  List_iterator<TABLE_LIST> ti(leaves);
   /*
     it_is_update set to TRUE when tables of primary SELECT_LEX (SELECT_LEX
     which belong to LEX, i.e. most up SELECT) will be updated by
@@ -8391,9 +8557,15 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
   bool it_is_update= (select_lex == &thd->lex->select_lex) &&
     thd->lex->which_check_option_applicable();
   bool save_is_item_list_lookup= select_lex->is_item_list_lookup;
-  select_lex->is_item_list_lookup= 0;
+  TABLE_LIST *derived= select_lex->master_unit()->derived;
   DBUG_ENTER("setup_conds");
 
+  /* Do not fix conditions for the derived tables that have been merged */
+  if (derived && derived->merged)
+    DBUG_RETURN(0);
+
+  select_lex->is_item_list_lookup= 0;
+
   thd->mark_used_columns= MARK_COLUMNS_READ;
   DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
   select_lex->cond_count= 0;
@@ -8402,23 +8574,38 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
 
   for (table= tables; table; table= table->next_local)
   {
-    if (table->prepare_where(thd, conds, FALSE))
+    if (select_lex == &thd->lex->select_lex &&
+        select_lex->first_cond_optimization &&
+        table->merged_for_insert &&
+        table->prepare_where(thd, conds, FALSE))
       goto err_no_arena;
   }
 
+  thd->thd_marker.emb_on_expr_nest= NO_JOIN_NEST;
   if (*conds)
   {
     thd->where="where clause";
+    DBUG_EXECUTE("where",
+                 print_where(*conds,
+                             "WHERE in setup_conds",
+                             QT_ORDINARY););
+    /*
+      Wrap alone field in WHERE clause in case it will be outer field of subquery
+      which need persistent pointer on it, but conds could be changed by optimizer
+    */
+    if ((*conds)->type() == Item::FIELD_ITEM && !derived)
+      wrap_ident(thd, conds);
     if ((!(*conds)->fixed && (*conds)->fix_fields(thd, conds)) ||
 	(*conds)->check_cols(1))
       goto err_no_arena;
   }
+  thd->thd_marker.emb_on_expr_nest= save_emb_on_expr_nest;
 
   /*
     Apply fix_fields() to all ON clauses at all levels of nesting,
     including the ones inside view definitions.
   */
-  for (table= leaves; table; table= table->next_leaf)
+  while ((table= ti++))
   {
     TABLE_LIST *embedded; /* The table at the current level of nesting. */
     TABLE_LIST *embedding= table; /* The parent nested table reference. */
@@ -8428,9 +8615,10 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
       if (embedded->on_expr)
       {
         /* Make a join an a expression */
+        thd->thd_marker.emb_on_expr_nest= embedded;
         thd->where="on clause";
         if ((!embedded->on_expr->fixed &&
-            embedded->on_expr->fix_fields(thd, &embedded->on_expr)) ||
+             embedded->on_expr->fix_fields(thd, &embedded->on_expr)) ||
 	    embedded->on_expr->check_cols(1))
 	  goto err_no_arena;
         select_lex->cond_count++;
@@ -8452,6 +8640,7 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
       }
     }
   }
+  thd->thd_marker.emb_on_expr_nest= save_emb_on_expr_nest;
 
   if (!thd->stmt_arena->is_conventional())
   {
@@ -8505,7 +8694,8 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
   List_iterator_fast<Item> f(fields),v(values);
   Item *value, *fld;
   Item_field *field;
-  TABLE *table= 0;
+  TABLE *table= 0, *vcol_table= 0;
+  bool abort_on_warning_saved= thd->abort_on_warning;
   DBUG_ENTER("fill_record");
 
   /*
@@ -8528,6 +8718,8 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
     table->auto_increment_field_not_null= FALSE;
     f.rewind();
   }
+  else if (thd->lex->unit.insert_table_with_stored_vcol)
+    vcol_table= thd->lex->unit.insert_table_with_stored_vcol;
   while ((fld= f++))
   {
     if (!(field= fld->filed_for_view_update()))
@@ -8540,14 +8732,38 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
     table= rfield->table;
     if (rfield == table->next_number_field)
       table->auto_increment_field_not_null= TRUE;
+    if (rfield->vcol_info && 
+        value->type() != Item::DEFAULT_VALUE_ITEM && 
+        value->type() != Item::NULL_ITEM &&
+        table->s->table_category != TABLE_CATEGORY_TEMPORARY)
+    {
+      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN,
+                          ER(ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN),
+                          rfield->field_name, table->s->table_name.str);
+    }
     if ((value->save_in_field(rfield, 0) < 0) && !ignore_errors)
     {
       my_message(ER_UNKNOWN_ERROR, ER(ER_UNKNOWN_ERROR), MYF(0));
       goto err;
     }
+    DBUG_ASSERT(vcol_table == 0 || vcol_table == table);
+    vcol_table= table;
   }
+  /* Update virtual fields*/
+  thd->abort_on_warning= FALSE;
+  if (vcol_table)
+  {
+    if (vcol_table->vfield)
+    {
+      if (update_virtual_fields(thd, vcol_table, TRUE))
+        goto err;
+    }
+  }
+  thd->abort_on_warning= abort_on_warning_saved;
   DBUG_RETURN(thd->is_error());
 err:
+  thd->abort_on_warning= abort_on_warning_saved;
   if (table)
     table->auto_increment_field_not_null= FALSE;
   DBUG_RETURN(TRUE);
@@ -8583,9 +8799,31 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
                                      Table_triggers_list *triggers,
                                      enum trg_event_type event)
 {
-  return (fill_record(thd, fields, values, ignore_errors) ||
-          (triggers && triggers->process_triggers(thd, event,
-                                                 TRG_ACTION_BEFORE, TRUE)));
+  bool result;
+  result= (fill_record(thd, fields, values, ignore_errors) ||
+           (triggers && triggers->process_triggers(thd, event,
+                                                   TRG_ACTION_BEFORE, TRUE)));
+  /*
+    Re-calculate virtual fields to cater for cases when base columns are
+    updated by the triggers.
+  */
+  if (!result && triggers)
+  {
+    TABLE *table= 0;
+    List_iterator_fast<Item> f(fields);
+    Item *fld;
+    Item_field *item_field;
+    if (fields.elements)
+    {
+      fld= (Item_field*)f++;
+      item_field= fld->filed_for_view_update();
+      if (item_field && item_field->field &&
+          (table= item_field->field->table) &&
+        table->vfield)
+        result= update_virtual_fields(thd, table, TRUE);
+    }
+  }
+  return result;
 }
 
 
@@ -8598,6 +8836,7 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
     ptr           pointer on pointer to record
     values        list of fields
     ignore_errors TRUE if we should ignore errors
+    use_value     forces usage of value of the items instead of result
 
   NOTE
     fill_record() may set table->auto_increment_field_not_null and a
@@ -8610,41 +8849,69 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
 */
 
 bool
-fill_record(THD *thd, Field **ptr, List<Item> &values, bool ignore_errors)
+fill_record(THD *thd, Field **ptr, List<Item> &values, bool ignore_errors,
+            bool use_value)
 {
   List_iterator_fast<Item> v(values);
+  List<TABLE> tbl_list;
   Item *value;
   TABLE *table= 0;
+  Field *field;
+  bool abort_on_warning_saved= thd->abort_on_warning;
   DBUG_ENTER("fill_record");
 
-  Field *field;
+  if (!*ptr)
+  {
+    /* No fields to update, quite strange!*/
+    DBUG_RETURN(0);
+  }
+
+  /*
+    On INSERT or UPDATE fields are checked to be from the same table,
+    thus we safely can take table from the first field.
+  */
+  table= (*ptr)->table;
+
   /*
     Reset the table->auto_increment_field_not_null as it is valid for
     only one row.
   */
-  if (*ptr)
-  {
-    /*
-      On INSERT or UPDATE fields are checked to be from the same table,
-      thus we safely can take table from the first field.
-    */
-    table= (*ptr)->table;
-    table->auto_increment_field_not_null= FALSE;
-  }
+  table->auto_increment_field_not_null= FALSE;
   while ((field = *ptr++) && ! thd->is_error())
   {
+    /* Ensure that all fields are from the same table */
+    DBUG_ASSERT(field->table == table);
+
     value=v++;
-    table= field->table;
     if (field == table->next_number_field)
       table->auto_increment_field_not_null= TRUE;
-    if (value->save_in_field(field, 0) < 0)
-      goto err;
+    if (field->vcol_info && 
+        value->type() != Item::DEFAULT_VALUE_ITEM && 
+        value->type() != Item::NULL_ITEM &&
+        table->s->table_category != TABLE_CATEGORY_TEMPORARY)
+    {
+      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN,
+                          ER(ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN),
+                          field->field_name, table->s->table_name.str);
+    }
+
+    if (use_value)
+      value->save_val(field);
+    else
+      if (value->save_in_field(field, 0) < 0)
+        goto err;
   }
+  /* Update virtual fields*/
+  thd->abort_on_warning= FALSE;
+  if (table->vfield && update_virtual_fields(thd, table, TRUE))
+    goto err;
+  thd->abort_on_warning= abort_on_warning_saved;
   DBUG_RETURN(thd->is_error());
 
 err:
-  if (table)
-    table->auto_increment_field_not_null= FALSE;
+  thd->abort_on_warning= abort_on_warning_saved;
+  table->auto_increment_field_not_null= FALSE;
   DBUG_RETURN(TRUE);
 }
 
@@ -8678,9 +8945,22 @@ fill_record_n_invoke_before_triggers(THD *thd, Field **ptr,
                                      Table_triggers_list *triggers,
                                      enum trg_event_type event)
 {
-  return (fill_record(thd, ptr, values, ignore_errors) ||
-          (triggers && triggers->process_triggers(thd, event,
-                                                 TRG_ACTION_BEFORE, TRUE)));
+  bool result;
+  result= (fill_record(thd, ptr, values, ignore_errors, FALSE) ||
+           (triggers && triggers->process_triggers(thd, event,
+                                                   TRG_ACTION_BEFORE, TRUE)));
+  /*
+    Re-calculate virtual fields to cater for cases when base columns are
+    updated by the triggers.
+  */
+  if (!result && triggers && *ptr)
+  {
+    TABLE *table= (*ptr)->table;
+    if (table->vfield)
+      result= update_virtual_fields(thd, table, TRUE);
+  }
+  return result;
+
 }
 
 
@@ -8725,7 +9005,7 @@ my_bool mysql_rm_tmp_tables(void)
         uint filePath_len= my_snprintf(filePath, sizeof(filePath),
                                        "%s%c%s", tmpdir, FN_LIBCHAR,
                                        file->name);
-        if (!memcmp(reg_ext, ext, ext_len))
+        if (!strcmp(reg_ext, ext))
         {
           handler *handler_file= 0;
           /* We should cut file extention before deleting of table */
@@ -8757,7 +9037,6 @@ my_bool mysql_rm_tmp_tables(void)
 }
 
 
-
 /*****************************************************************************
 	unireg support functions
 *****************************************************************************/
@@ -9169,12 +9448,6 @@ close_system_tables(THD *thd, Open_tables_backup *backup)
   held by the connection due to a preceding implicit
   commit.
 
-  This function assumes that there is no
-  statement transaction started for the operation
-  itself, since mysql.* tables are not transactional
-  and when they are used the binlog is off (DDL
-  binlogging is always statement-based.
-
   We need this function since we'd like to not
   just close the system table, but also release
   the metadata lock on it.
@@ -9188,8 +9461,8 @@ close_system_tables(THD *thd, Open_tables_backup *backup)
 void
 close_mysql_tables(THD *thd)
 {
-  /* No need to commit/rollback statement transaction, it's not started. */
-  DBUG_ASSERT(thd->transaction.stmt.is_empty());
+  if (! thd->in_sub_stmt)
+    trans_commit_stmt(thd);
   close_thread_tables(thd);
   thd->mdl_context.release_transactional_locks();
 }
@@ -9283,6 +9556,61 @@ void close_log_table(THD *thd, Open_tables_backup *backup)
   close_system_tables(thd, backup);
 }
 
+
+/**
+  @brief
+  Remove 'fixed' flag from items in a list
+
+  @param items list of items to un-fix
+
+  @details
+  This function sets to 0 the 'fixed' flag for items in the 'items' list.
+  It's needed to force correct marking of views' fields for INSERT/UPDATE
+  statements.
+*/
+
+void unfix_fields(List<Item> &fields)
+{
+  List_iterator<Item> li(fields);
+  Item *item;
+  while ((item= li++))
+    item->fixed= 0;
+}
+
+
+/**
+  Check result of dynamic column function and issue error if it is needed
+
+  @param rc              The result code of dynamic column function
+
+  @return the result code which was get as an argument\
+*/
+
+int dynamic_column_error_message(enum_dyncol_func_result rc)
+{
+  switch (rc) {
+  case ER_DYNCOL_YES:
+  case ER_DYNCOL_OK:
+    break; // it is not an error
+  case ER_DYNCOL_FORMAT:
+    my_error(ER_DYN_COL_WRONG_FORMAT, MYF(0));
+    break;
+  case ER_DYNCOL_LIMIT:
+    my_error(ER_DYN_COL_IMPLEMENTATION_LIMIT, MYF(0));
+    break;
+  case ER_DYNCOL_RESOURCE:
+    my_error(ER_OUT_OF_RESOURCES, MYF(0));
+    break;
+  case ER_DYNCOL_DATA:
+    my_error(ER_DYN_COL_DATA, MYF(0));
+    break;
+  case ER_DYNCOL_UNKNOWN_CHARSET:
+    my_error(ER_DYN_COL_WRONG_CHARSET, MYF(0));
+    break;
+  }
+  return rc;
+}
+
 /**
   @} (end of group Data_Dictionary)
 */
diff --git a/sql/sql_base.h b/sql/sql_base.h
index dc8320687fc..e602aba98a9 100644
--- a/sql/sql_base.h
+++ b/sql/sql_base.h
@@ -127,6 +127,7 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type update,
   be open do not acquire global and schema-scope IX locks.
 */
 #define MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK         0x1000
+#define MYSQL_LOCK_NOT_TEMPORARY		0x2000
 
 /** Please refer to the internals manual. */
 #define MYSQL_OPEN_REOPEN  (MYSQL_OPEN_IGNORE_FLUSH |\
@@ -182,13 +183,16 @@ bool fill_record_n_invoke_before_triggers(THD *thd, Field **field,
 bool insert_fields(THD *thd, Name_resolution_context *context,
 		   const char *db_name, const char *table_name,
                    List_iterator<Item> *it, bool any_privileges);
+void make_leaves_list(List<TABLE_LIST> &list, TABLE_LIST *tables,
+                      bool full_table_list, TABLE_LIST *boundary);
 int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
 	       List<Item> *sum_func_list, uint wild_num);
 bool setup_fields(THD *thd, Item** ref_pointer_array,
                   List<Item> &item, enum_mark_columns mark_used_columns,
                   List<Item> *sum_func_list, bool allow_sum_func);
+void unfix_fields(List<Item> &items);
 bool fill_record(THD *thd, Field **field, List<Item> &values,
-                 bool ignore_errors);
+                 bool ignore_errors, bool use_value);
 
 Field *
 find_field_in_tables(THD *thd, Item_ident *item,
@@ -213,15 +217,17 @@ Item ** find_item_in_list(Item *item, List<Item> &items, uint *counter,
                           enum_resolution_type *resolution);
 bool setup_tables(THD *thd, Name_resolution_context *context,
                   List<TABLE_LIST> *from_clause, TABLE_LIST *tables,
-                  TABLE_LIST **leaves, bool select_insert);
+                  List<TABLE_LIST> &leaves, bool select_insert,
+                  bool full_table_list);
 bool setup_tables_and_check_access(THD *thd,
                                    Name_resolution_context *context,
                                    List<TABLE_LIST> *from_clause,
                                    TABLE_LIST *tables,
-                                   TABLE_LIST **leaves,
+                                   List<TABLE_LIST> &leaves, 
                                    bool select_insert,
                                    ulong want_access_first,
-                                   ulong want_access);
+                                   ulong want_access,
+                                   bool full_table_list);
 bool wait_while_table_is_used(THD *thd, TABLE *table,
                               enum ha_extra_function function);
 
@@ -230,8 +236,9 @@ void drop_open_table(THD *thd, TABLE *table, const char *db_name,
 void update_non_unique_table_error(TABLE_LIST *update,
                                    const char *operation,
                                    TABLE_LIST *duplicate);
-int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
+int setup_conds(THD *thd, TABLE_LIST *tables, List<TABLE_LIST> &leaves,
 		COND **conds);
+void wrap_ident(THD *thd, Item **conds);
 int setup_ftfuncs(SELECT_LEX* select);
 int init_ftfuncs(THD *thd, SELECT_LEX* select, bool no_order);
 bool lock_table_names(THD *thd, TABLE_LIST *table_list,
@@ -247,7 +254,8 @@ bool open_and_lock_tables(THD *thd, TABLE_LIST *tables,
 TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l,
                                 thr_lock_type lock_type, uint flags,
                                 Prelocking_strategy *prelocking_strategy);
-bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags);
+bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags,
+                                    uint dt_phases);
 bool lock_tables(THD *thd, TABLE_LIST *tables, uint counter, uint flags);
 int decide_logging_format(THD *thd, TABLE_LIST *tables);
 void free_io_cache(TABLE *entry);
@@ -295,6 +303,8 @@ TABLE *find_table_for_mdl_upgrade(THD *thd, const char *db,
                                   bool no_error);
 void mark_tmp_table_for_reuse(TABLE *table);
 bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool *exists);
+int update_virtual_fields(THD *thd, TABLE *table, bool ignore_stored= FALSE);
+int dynamic_column_error_message(enum_dyncol_func_result rc);
 
 extern TABLE *unused_tables;
 extern Item **not_found_item;
@@ -314,7 +324,7 @@ extern HASH table_def_cache;
 inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr)
 {
   table->used_fields= 0;
-  table->const_table= 0;
+  table_list->reset_const_table();
   table->null_row= 0;
   table->status= STATUS_NO_RECORD;
   table->maybe_null= table_list->outer_join;
@@ -330,6 +340,14 @@ inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr)
   table->force_index_order= table->force_index_group= 0;
   table->covering_keys= table->s->keys_for_keyread;
   table->merge_keys.clear_all();
+  TABLE_LIST *orig= table_list->select_lex ?
+    table_list->select_lex->master_unit()->derived : 0;
+  if (!orig || !orig->is_merged_derived())
+  {
+    /* Tables merged from derived were set up already.*/
+    table->covering_keys= table->s->keys_for_keyread;
+    table->merge_keys.clear_all();
+  }
 }
 
 inline TABLE_LIST *find_table_in_global_list(TABLE_LIST *table,
diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
index 8e7ee4f01c7..5ddff949ce3 100644
--- a/sql/sql_binlog.cc
+++ b/sql/sql_binlog.cc
@@ -84,7 +84,7 @@ void mysql_client_binlog_statement(THD* thd)
   if (!rli)
   {
     rli= thd->rli_fake= new Relay_log_info(FALSE);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
     rli->is_fake= TRUE;
 #endif
     have_fd_event= FALSE;
@@ -120,7 +120,7 @@ void mysql_client_binlog_statement(THD* thd)
     char const *endptr= 0;
     int bytes_decoded= base64_decode(strptr, coded_len, buf, &endptr);
 
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
       /*
         This debug printout should not be used for valgrind builds
         since it will read from unassigned memory.
@@ -185,7 +185,7 @@ void mysql_client_binlog_statement(THD* thd)
       */
       if (!have_fd_event)
       {
-        int type = bufptr[EVENT_TYPE_OFFSET];
+        int type = (uchar)bufptr[EVENT_TYPE_OFFSET];
         if (type == FORMAT_DESCRIPTION_EVENT || type == START_EVENT_V3)
           have_fd_event= TRUE;
         else
@@ -197,7 +197,8 @@ void mysql_client_binlog_statement(THD* thd)
       }
 
       ev= Log_event::read_log_event(bufptr, event_len, &error,
-                                    rli->relay_log.description_event_for_exec);
+                                    rli->relay_log.description_event_for_exec,
+                                    0);
 
       DBUG_PRINT("info",("binlog base64 err=%s", error));
       if (!ev)
diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h
index f08a6a07bce..db4c7110ac7 100644
--- a/sql/sql_bitmap.h
+++ b/sql/sql_bitmap.h
@@ -64,13 +64,13 @@ public:
   }
   void subtract(Bitmap& map2) { bitmap_subtract(&map, &map2.map); }
   void merge(Bitmap& map2) { bitmap_union(&map, &map2.map); }
-  my_bool is_set(uint n) const { return bitmap_is_set(&map, n); }
-  my_bool is_prefix(uint n) const { return bitmap_is_prefix(&map, n); }
-  my_bool is_clear_all() const { return bitmap_is_clear_all(&map); }
-  my_bool is_set_all() const { return bitmap_is_set_all(&map); }
-  my_bool is_subset(const Bitmap& map2) const { return bitmap_is_subset(&map, &map2.map); }
-  my_bool is_overlapping(const Bitmap& map2) const { return bitmap_is_overlapping(&map, &map2.map); }
-  my_bool operator==(const Bitmap& map2) const { return bitmap_cmp(&map, &map2.map); }
+  bool is_set(uint n) const { return bitmap_is_set(&map, n); }
+  bool is_prefix(uint n) const { return bitmap_is_prefix(&map, n); }
+  bool is_clear_all() const { return bitmap_is_clear_all(&map); }
+  bool is_set_all() const { return bitmap_is_set_all(&map); }
+  bool is_subset(const Bitmap& map2) const { return bitmap_is_subset(&map, &map2.map); }
+  bool is_overlapping(const Bitmap& map2) const { return bitmap_is_overlapping(&map, &map2.map); }
+  bool operator==(const Bitmap& map2) const { return bitmap_cmp(&map, &map2.map); }
   char *print(char *buf) const
   {
     char *s=buf;
@@ -95,6 +95,38 @@ public:
     DBUG_ASSERT(sizeof(buffer) >= 4);
     return (ulonglong) uint4korr(buffer);
   }
+  uint bits_set()
+  {
+    return bitmap_bits_set(&map);
+  }
+};
+
+/* An iterator to quickly walk over bits in unlonglong bitmap. */
+class Table_map_iterator
+{
+  ulonglong bmp;
+  uint no;
+public:
+  Table_map_iterator(ulonglong t) : bmp(t), no(0) {}
+  int next_bit()
+  {
+    static const char last_bit[16]= {32, 0, 1, 0, 
+                                      2, 0, 1, 0, 
+                                      3, 0, 1, 0,
+                                      2, 0, 1, 0};
+    uint bit;
+    while ((bit= last_bit[bmp & 0xF]) == 32)
+    {
+      no += 4;
+      bmp= bmp >> 4;
+      if (!bmp)
+        return BITMAP_END;
+    }
+    bmp &= ~(1LL << bit);
+    return no + bit;
+  }
+  int operator++(int) { return next_bit(); }
+  enum { BITMAP_END= 64 };
 };
 
 template <> class Bitmap<64>
@@ -122,15 +154,31 @@ public:
   void intersect_extended(ulonglong map2) { map&= map2; }
   void subtract(Bitmap<64>& map2) { map&= ~map2.map; }
   void merge(Bitmap<64>& map2) { map|= map2.map; }
-  my_bool is_set(uint n) const { return test(map & (((ulonglong)1) << n)); }
-  my_bool is_prefix(uint n) const { return map == (((ulonglong)1) << n)-1; }
-  my_bool is_clear_all() const { return map == (ulonglong)0; }
-  my_bool is_set_all() const { return map == ~(ulonglong)0; }
-  my_bool is_subset(const Bitmap<64>& map2) const { return !(map & ~map2.map); }
-  my_bool is_overlapping(const Bitmap<64>& map2) const { return (map & map2.map)!= 0; }
-  my_bool operator==(const Bitmap<64>& map2) const { return map == map2.map; }
+  bool is_set(uint n) const { return test(map & (((ulonglong)1) << n)); }
+  bool is_prefix(uint n) const { return map == (((ulonglong)1) << n)-1; }
+  bool is_clear_all() const { return map == (ulonglong)0; }
+  bool is_set_all() const { return map == ~(ulonglong)0; }
+  bool is_subset(const Bitmap<64>& map2) const { return !(map & ~map2.map); }
+  bool is_overlapping(const Bitmap<64>& map2) const { return (map & map2.map)!= 0; }
+  bool operator==(const Bitmap<64>& map2) const { return map == map2.map; }
   char *print(char *buf) const { longlong2str(map,buf,16); return buf; }
   ulonglong to_ulonglong() const { return map; }
+  class Iterator : public Table_map_iterator
+  {
+  public:
+    Iterator(Bitmap<64> &bmp) : Table_map_iterator(bmp.map) {}
+  };
+  uint bits_set()
+  {
+    //TODO: use my_count_bits()
+    uint res= 0, i= 0;
+    for (; i < 64 ; i++)
+    {
+      if (map & ((ulonglong)1<<i))
+        res++;
+    }
+    return res;
+  }
 };
 
 
diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in
index cf7006fc7ce..63850650ac9 100644
--- a/sql/sql_builtin.cc.in
+++ b/sql/sql_builtin.cc.in
@@ -13,24 +13,27 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
 
+#include <my_global.h>
 #include <mysql/plugin.h>
 
-typedef struct st_mysql_plugin builtin_plugin[];
+typedef struct st_maria_plugin builtin_maria_plugin[];
 
 #ifdef _MSC_VER
 extern "C"
 #else
 extern
 #endif
-builtin_plugin 
-  @mysql_mandatory_plugins@ @mysql_optional_plugins@ builtin_binlog_plugin, builtin_mysql_password_plugin;
+builtin_maria_plugin 
+  @mysql_mandatory_plugins@ @mysql_optional_plugins@
+  builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin;
 
-struct st_mysql_plugin *mysql_optional_plugins[]=
+struct st_maria_plugin *mysql_optional_plugins[]=
 {
   @mysql_optional_plugins@ 0
 };
 
-struct st_mysql_plugin *mysql_mandatory_plugins[]=
+struct st_maria_plugin *mysql_mandatory_plugins[]=
 {
-  builtin_binlog_plugin, builtin_mysql_password_plugin, @mysql_mandatory_plugins@ 0
+  builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin,
+  @mysql_mandatory_plugins@ 0
 };
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index 47628f1c590..db0da4bdae6 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -342,8 +342,11 @@ TODO list:
 #include "../storage/myisammrg/ha_myisammrg.h"
 #include "../storage/myisammrg/myrg_def.h"
 #include "probes_mysql.h"
+#include "log_slow.h"
 #include "transaction.h"
 
+const uchar *query_state_map;
+
 #ifdef EMBEDDED_LIBRARY
 #include "emb_qcache.h"
 #endif
@@ -429,6 +432,136 @@ struct Query_cache_wait_state
 };
 
 
+/*
+  Check if character is a white space.
+*/
+
+inline bool is_white_space(char c)
+{
+  return (query_state_map[(uint) ((uchar) c)] == MY_LEX_SKIP);
+}
+
+
+/**
+  Generate a query_string without query comments or duplicated space
+
+  @param new_query	    New query without 'fluff' is stored here
+  @param query		    Original query
+  @param query_length	    Length of original query
+  @param additional_length  Extra space for query cache we need to allocate
+  			    in new_query buffer.
+
+  Note:
+    If there is no space to allocate new_query, we will put original query
+    into new_query.
+*/
+
+static void make_base_query(String *new_query,
+                            const char *query, size_t query_length,
+                            size_t additional_length)
+{
+  char *buffer;
+  const char *query_end, *last_space;
+
+  /* The following is guaranteed by the query_cache interface */
+  DBUG_ASSERT(query[query_length] == 0);
+  DBUG_ASSERT(!is_white_space(query[0]));
+
+  if (new_query->realloc(query_length + additional_length))
+  {
+    /*
+      We could not allocate the query.  Use original query for
+      the query cache;  Better than nothing....
+    */
+    new_query->set(query, query_length, system_charset_info);
+    return;
+  }
+
+  buffer= (char*) new_query->ptr();             // Store base query here
+  query_end= query + query_length;
+  last_space= 0;                                // No space found yet
+
+  while (query < query_end)
+  {
+    char current = *(query++);
+    switch (current) {
+    case '\'':
+    case '`':
+    case '"':
+      *(buffer++)= current;                     // copy first quote
+      while (query < query_end)
+      {
+        *(buffer++)= *query;
+        if (*(query++) == current)              // found pair quote
+          break;
+      }
+      continue;                                 // Continue with next symbol
+    case '/':                                   // Start of comment ?
+      /*
+        Comment of format /#!number #/, must be skipped.
+        These may include '"' and other comments, but it should
+        be safe to parse the content as a normal string.
+      */
+      if (query[0] != '*' || query[1] == '!')
+        break;
+
+      query++;                               // skip "/"
+      while (++query < query_end)
+      {
+        if (query[0] == '*' && query[1] == '/')
+        {
+          query+= 2;
+          goto insert_space;
+        }
+      }
+      continue;                                 // Will end outer loop
+    case '-':
+      if (*query != '-' || !is_white_space(query[1])) // Not a comment
+        break;
+      query++;                 // skip second "-", and go to search of "\n"
+      /* fall through */
+    case '#':
+      while (query < query_end)
+      {
+        if (*(query++) == '\n')
+          goto insert_space;
+      }
+      continue;                                 // Will end outer loop
+    default:
+      if (is_white_space(current))
+        goto insert_space;
+      break;
+    }
+    *(buffer++)= current;
+    continue;
+
+insert_space:
+    if (buffer != last_space)
+    {
+      *(buffer++)= ' ';
+      last_space= buffer;
+    }
+  }
+  if (buffer == last_space)
+    buffer--;                                   // Remove the last space
+  *buffer= 0;
+  new_query->length((size_t) (buffer - new_query->ptr()));
+}
+
+
+/**
+  Check and change local variable if global one is switched
+
+  @param thd             thread handle
+*/
+
+void inline fix_local_query_cache_mode(THD *thd)
+{
+  if (global_system_variables.query_cache_type == 0)
+    thd->variables.query_cache_type= 0;
+}
+
+
 /**
   Serialize access to the query cache.
   If the lock cannot be granted the thread hangs in a conditional wait which
@@ -438,32 +571,42 @@ struct Query_cache_wait_state
   effect by another thread. This enables a quick path in execution to skip waits
   when the outcome is known.
 
-  @param use_timeout TRUE if the lock can abort because of a timeout.
+  @param mode TIMEOUT the lock can abort because of a timeout
+              TRY the lock can abort because it is locked now
+              WAIT wait for lock (default)
 
-  @note use_timeout is optional and default value is FALSE.
+  @note mode is optional and default value is WAIT.
 
   @return
    @retval FALSE An exclusive lock was taken
    @retval TRUE The locking attempt failed
 */
 
-bool Query_cache::try_lock(bool use_timeout)
+bool Query_cache::try_lock(THD *thd, Cache_try_lock_mode mode)
 {
-  bool interrupt= FALSE;
-  THD *thd= current_thd;
+  bool interrupt= TRUE;
   Query_cache_wait_state wait_state(thd, __func__, __FILE__, __LINE__);
   DBUG_ENTER("Query_cache::try_lock");
 
   mysql_mutex_lock(&structure_guard_mutex);
+  DBUG_EXECUTE_IF("status_wait_query_cache_mutex_sleep", { sleep(5); });
+  if (m_cache_status == DISABLED)
+  {
+    mysql_mutex_unlock(&structure_guard_mutex);
+    DBUG_RETURN(TRUE);
+  }
+  m_requests_in_progress++;
+  fix_local_query_cache_mode(thd);
+
   while (1)
   {
     if (m_cache_lock_status == Query_cache::UNLOCKED)
     {
       m_cache_lock_status= Query_cache::LOCKED;
 #ifndef DBUG_OFF
-      if (thd)
-        m_cache_lock_thread_id= thd->thread_id;
+      m_cache_lock_thread_id= thd->thread_id;
 #endif
+      interrupt= FALSE;
       break;
     }
     else if (m_cache_lock_status == Query_cache::LOCKED_NO_WAIT)
@@ -472,7 +615,6 @@ bool Query_cache::try_lock(bool use_timeout)
         If query cache is protected by a LOCKED_NO_WAIT lock this thread
         should avoid using the query cache as it is being evicted.
       */
-      interrupt= TRUE;
       break;
     }
     else
@@ -482,24 +624,34 @@ bool Query_cache::try_lock(bool use_timeout)
         To prevent send_result_to_client() and query_cache_insert() from
         blocking execution for too long a timeout is put on the lock.
       */
-      if (use_timeout)
+      if (mode == WAIT)
+      {
+        mysql_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
+      }
+      else if (mode == TIMEOUT)
       {
         struct timespec waittime;
         set_timespec_nsec(waittime,(ulong)(50000000L));  /* Wait for 50 msec */
         int res= mysql_cond_timedwait(&COND_cache_status_changed,
                                       &structure_guard_mutex, &waittime);
         if (res == ETIMEDOUT)
-        {
-          interrupt= TRUE;
           break;
-        }
       }
       else
       {
-        mysql_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
+        /**
+          If we are here, then mode is == TRY and there was someone else using
+          the query cache. (m_cache_lock_status != Query_cache::UNLOCKED).
+          Signal that we didn't get a lock.
+        */
+        DBUG_ASSERT(m_requests_in_progress > 1);
+        DBUG_ASSERT(mode == TRY);
+        break;
       }
     }
   }
+  if (interrupt)
+    m_requests_in_progress--;
   mysql_mutex_unlock(&structure_guard_mutex);
 
   DBUG_RETURN(interrupt);
@@ -524,10 +676,12 @@ void Query_cache::lock_and_suspend(void)
   DBUG_ENTER("Query_cache::lock_and_suspend");
 
   mysql_mutex_lock(&structure_guard_mutex);
+  m_requests_in_progress++;
   while (m_cache_lock_status != Query_cache::UNLOCKED)
     mysql_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
   m_cache_lock_status= Query_cache::LOCKED_NO_WAIT;
 #ifndef DBUG_OFF
+  /* Here thd may not be set during shutdown */
   if (thd)
     m_cache_lock_thread_id= thd->thread_id;
 #endif
@@ -546,19 +700,19 @@ void Query_cache::lock_and_suspend(void)
   It is used by all methods which invalidates one or more tables.
  */
 
-void Query_cache::lock(void)
+void Query_cache::lock(THD *thd)
 {
-  THD *thd= current_thd;
   Query_cache_wait_state wait_state(thd, __func__, __FILE__, __LINE__);
   DBUG_ENTER("Query_cache::lock");
 
   mysql_mutex_lock(&structure_guard_mutex);
+  m_requests_in_progress++;
+  fix_local_query_cache_mode(thd);
   while (m_cache_lock_status != Query_cache::UNLOCKED)
     mysql_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
   m_cache_lock_status= Query_cache::LOCKED;
 #ifndef DBUG_OFF
-  if (thd)
-    m_cache_lock_thread_id= thd->thread_id;
+  m_cache_lock_thread_id= thd->thread_id;
 #endif
   mysql_mutex_unlock(&structure_guard_mutex);
 
@@ -575,6 +729,7 @@ void Query_cache::unlock(void)
   DBUG_ENTER("Query_cache::unlock");
   mysql_mutex_lock(&structure_guard_mutex);
 #ifndef DBUG_OFF
+  /* Thd may not be set in resize() at mysqld start */
   THD *thd= current_thd;
   if (thd)
     DBUG_ASSERT(m_cache_lock_thread_id == thd->thread_id);
@@ -584,6 +739,14 @@ void Query_cache::unlock(void)
   m_cache_lock_status= Query_cache::UNLOCKED;
   DBUG_PRINT("Query_cache",("Sending signal"));
   mysql_cond_signal(&COND_cache_status_changed);
+  DBUG_ASSERT(m_requests_in_progress > 0);
+  m_requests_in_progress--;
+  if (m_requests_in_progress == 0 && m_cache_status == DISABLE_REQUEST)
+  {
+    /* No clients => just free query cache */
+    free_cache();
+    m_cache_status= DISABLED;
+  }
   mysql_mutex_unlock(&structure_guard_mutex);
   DBUG_VOID_RETURN;
 }
@@ -600,25 +763,24 @@ void Query_cache::unlock(void)
    @retval FALSE No directive found.
 */
  
-static bool has_no_cache_directive(char *sql)
+static bool has_no_cache_directive(const char *sql)
 {
-  int i=0;
-  while (sql[i] == ' ')
-    ++i;
+  while (is_white_space(*sql))
+    sql++;
     
-  if (my_toupper(system_charset_info, sql[i])    == 'S' &&
-      my_toupper(system_charset_info, sql[i+1])  == 'Q' &&
-      my_toupper(system_charset_info, sql[i+2])  == 'L' &&
-      my_toupper(system_charset_info, sql[i+3])  == '_' &&
-      my_toupper(system_charset_info, sql[i+4])  == 'N' &&
-      my_toupper(system_charset_info, sql[i+5])  == 'O' &&
-      my_toupper(system_charset_info, sql[i+6])  == '_' &&
-      my_toupper(system_charset_info, sql[i+7])  == 'C' &&
-      my_toupper(system_charset_info, sql[i+8])  == 'A' &&
-      my_toupper(system_charset_info, sql[i+9])  == 'C' &&
-      my_toupper(system_charset_info, sql[i+10]) == 'H' &&
-      my_toupper(system_charset_info, sql[i+11]) == 'E' &&
-      my_toupper(system_charset_info, sql[i+12]) == ' ')
+  if (my_toupper(system_charset_info, sql[0])  == 'S' &&
+      my_toupper(system_charset_info, sql[1])  == 'Q' &&
+      my_toupper(system_charset_info, sql[2])  == 'L' &&
+      my_toupper(system_charset_info, sql[3])  == '_' &&
+      my_toupper(system_charset_info, sql[4])  == 'N' &&
+      my_toupper(system_charset_info, sql[5])  == 'O' &&
+      my_toupper(system_charset_info, sql[6])  == '_' &&
+      my_toupper(system_charset_info, sql[7])  == 'C' &&
+      my_toupper(system_charset_info, sql[8])  == 'A' &&
+      my_toupper(system_charset_info, sql[9])  == 'C' &&
+      my_toupper(system_charset_info, sql[10]) == 'H' &&
+      my_toupper(system_charset_info, sql[11]) == 'E' &&
+      my_isspace(system_charset_info, sql[12]))
     return TRUE;
   
   return FALSE;       
@@ -750,7 +912,7 @@ inline void Query_cache_query::lock_writing()
   remove it.
 */
 
-my_bool Query_cache_query::try_lock_writing()
+bool Query_cache_query::try_lock_writing()
 {
   DBUG_ENTER("Query_cache_block::try_lock_writing");
   if (mysql_rwlock_trywrlock(&lock) != 0)
@@ -892,13 +1054,19 @@ Query_cache::insert(Query_cache_tls *query_cache_tls,
 {
   DBUG_ENTER("Query_cache::insert");
 
-  /* See the comment on double-check locking usage above. */
+  /* First we check if query cache is disable without doing a mutex lock */
   if (is_disabled() || query_cache_tls->first_query_block == NULL)
     DBUG_VOID_RETURN;
 
+  DBUG_ASSERT(current_thd);
+
   QC_DEBUG_SYNC("wait_in_query_cache_insert");
 
-  if (try_lock())
+  /*
+    Lock the cache with try_lock(). try_lock() will fail if
+    cache was disabled between the above test and lock.
+  */
+  if (try_lock(current_thd, Query_cache::WAIT))
     DBUG_VOID_RETURN;
 
   Query_cache_block *query_block = query_cache_tls->first_query_block;
@@ -949,14 +1117,14 @@ Query_cache::insert(Query_cache_tls *query_cache_tls,
 void
 Query_cache::abort(Query_cache_tls *query_cache_tls)
 {
+  THD *thd;
   DBUG_ENTER("query_cache_abort");
-  THD *thd= current_thd;
 
   /* See the comment on double-check locking usage above. */
   if (is_disabled() || query_cache_tls->first_query_block == NULL)
     DBUG_VOID_RETURN;
 
-  if (try_lock())
+  if (try_lock(current_thd, Query_cache::WAIT))
     DBUG_VOID_RETURN;
 
   /*
@@ -966,6 +1134,7 @@ Query_cache::abort(Query_cache_tls *query_cache_tls)
   Query_cache_block *query_block= query_cache_tls->first_query_block;
   if (query_block)
   {
+    thd= current_thd;
     thd_proc_info(thd, "storing result in query cache");
     DUMP(this);
     BLOCK_LOCK_WR(query_block);
@@ -1006,7 +1175,7 @@ void Query_cache::end_of_result(THD *thd)
                      emb_count_querycache_size(thd), 0);
 #endif
 
-  if (try_lock())
+  if (try_lock(thd, Query_cache::WAIT))
     DBUG_VOID_RETURN;
 
   query_block= query_cache_tls->first_query_block;
@@ -1090,7 +1259,8 @@ Query_cache::Query_cache(ulong query_cache_limit_arg,
   :query_cache_size(0),
    query_cache_limit(query_cache_limit_arg),
    queries_in_cache(0), hits(0), inserts(0), refused(0),
-   total_blocks(0), lowmem_prunes(0), m_query_cache_is_disabled(FALSE),
+   total_blocks(0), lowmem_prunes(0),
+   m_cache_status(OK),
    min_allocation_unit(ALIGN_SIZE(min_allocation_unit_arg)),
    min_result_data_size(ALIGN_SIZE(min_result_data_size_arg)),
    def_query_hash_size(ALIGN_SIZE(def_query_hash_size_arg)),
@@ -1114,6 +1284,13 @@ ulong Query_cache::resize(ulong query_cache_size_arg)
 			query_cache_size_arg));
   DBUG_ASSERT(initialized);
 
+  if (global_system_variables.query_cache_type == 0)
+  {
+    if (query_cache_size_arg != 0)
+      my_error(ER_QUERY_CACHE_IS_DISABLED, MYF(0));
+    DBUG_RETURN(0);
+  }
+
   lock_and_suspend();
 
   /*
@@ -1146,8 +1323,17 @@ ulong Query_cache::resize(ulong query_cache_size_arg)
   query_cache_size= query_cache_size_arg;
   new_query_cache_size= init_cache();
 
+  /*
+    m_cache_status is internal query cache switch so switching it on/off
+    will not be reflected on global_system_variables.query_cache_type
+  */
   if (new_query_cache_size)
+  {
     DBUG_EXECUTE("check_querycache",check_integrity(1););
+    m_cache_status= OK;                         // size > 0 => enable cache
+  }
+  else
+    m_cache_status= DISABLED;                   // size 0 means the cache disabled
 
   unlock();
   DBUG_RETURN(new_query_cache_size);
@@ -1166,6 +1352,9 @@ void Query_cache::store_query(THD *thd, TABLE_LIST *tables_used)
 {
   TABLE_COUNTER_TYPE local_tables;
   ulong tot_length;
+  const char *query;
+  size_t query_length;
+  uint8 tables_type;
   DBUG_ENTER("Query_cache::store_query");
   /*
     Testing 'query_cache_size' without a lock here is safe: the thing
@@ -1175,12 +1364,23 @@ void Query_cache::store_query(THD *thd, TABLE_LIST *tables_used)
 
     See also a note on double-check locking usage above.
   */
-  if (thd->locked_tables_mode || query_cache_size == 0)
+  if (!thd->query_cache_is_applicable || query_cache_size == 0)
+  {
+    DBUG_PRINT("qcache", ("Query cache not ready"));
     DBUG_VOID_RETURN;
-  uint8 tables_type= 0;
+  }
+  if (thd->lex->sql_command != SQLCOM_SELECT)
+  {
+    DBUG_PRINT("qcache", ("Ignoring not SELECT command"));
+    DBUG_VOID_RETURN;
+  }
+
+  /* The following assert fails if we haven't called send_result_to_client */
+  DBUG_ASSERT(thd->base_query.is_alloced() ||
+              thd->base_query.ptr() == thd->query());
 
-  if ((local_tables= is_cacheable(thd, thd->query_length(),
-				  thd->query(), thd->lex, tables_used,
+  tables_type= 0;
+  if ((local_tables= is_cacheable(thd, thd->lex, tables_used,
 				  &tables_type)))
   {
     NET *net= &thd->net;
@@ -1259,7 +1459,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       The 'TRUE' parameter indicate that the lock is allowed to timeout
 
     */
-    if (try_lock(TRUE))
+    if (try_lock(thd, Query_cache::WAIT))
       DBUG_VOID_RETURN;
     if (query_cache_size == 0)
     {
@@ -1275,11 +1475,13 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       DBUG_VOID_RETURN;
     }
 
+    query=        thd->base_query.ptr();
+    query_length= thd->base_query.length();
+
     /* Key is query + database + flag */
     if (thd->db_length)
     {
-      memcpy(thd->query() + thd->query_length() + 1, thd->db, 
-        thd->db_length);
+      memcpy((char*) (query + query_length + 1), thd->db, thd->db_length);
       DBUG_PRINT("qcache", ("database: %s  length: %u",
 			    thd->db, (unsigned) thd->db_length)); 
     }
@@ -1287,24 +1489,24 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
     {
       DBUG_PRINT("qcache", ("No active database"));
     }
-    tot_length= thd->query_length() + thd->db_length + 1 +
+    tot_length= query_length + thd->db_length + 1 +
       QUERY_CACHE_FLAGS_SIZE;
     /*
       We should only copy structure (don't use it location directly)
       because of alignment issue
     */
-    memcpy((void*) (thd->query() + (tot_length - QUERY_CACHE_FLAGS_SIZE)),
+    memcpy((void*) (query + (tot_length - QUERY_CACHE_FLAGS_SIZE)),
 	   &flags, QUERY_CACHE_FLAGS_SIZE);
 
     /* Check if another thread is processing the same query? */
     Query_cache_block *competitor = (Query_cache_block *)
-      my_hash_search(&queries, (uchar*) thd->query(), tot_length);
+      my_hash_search(&queries, (uchar*) query, tot_length);
     DBUG_PRINT("qcache", ("competitor 0x%lx", (ulong) competitor));
     if (competitor == 0)
     {
       /* Query is not in cache and no one is working with it; Store it */
       Query_cache_block *query_block;
-      query_block= write_block_data(tot_length, (uchar*) thd->query(),
+      query_block= write_block_data(tot_length, (uchar*) query,
 				    ALIGN_SIZE(sizeof(Query_cache_query)),
 				    Query_cache_block::QUERY, local_tables);
       if (query_block != 0)
@@ -1361,7 +1563,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       DBUG_PRINT("qcache", ("Another thread process same query"));
     }
   }
-  else if (thd->lex->sql_command == SQLCOM_SELECT)
+  else
     statistic_increment(refused, &structure_guard_mutex);
 
 end:
@@ -1425,7 +1627,7 @@ send_data_in_chunks(NET *net, const uchar *packet, ulong len)
   to the user.
 
   @param thd Pointer to the thread handler
-  @param sql A pointer to the sql statement *
+  @param org_sql A pointer to the sql statement *
   @param query_length Length of the statement in characters
 
   @return status code
@@ -1440,7 +1642,7 @@ send_data_in_chunks(NET *net, const uchar *packet, ulong len)
 */
 
 int
-Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
+Query_cache::send_result_to_client(THD *thd, char *org_sql, uint query_length)
 {
   ulonglong engine_data;
   Query_cache_query *query;
@@ -1451,6 +1653,7 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
   Query_cache_block_table *block_table, *block_table_end;
   ulong tot_length;
   Query_cache_query_flags flags;
+  const char *sql, *sql_end;
   DBUG_ENTER("Query_cache::send_result_to_client");
 
   /*
@@ -1461,50 +1664,95 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
     See also a note on double-check locking usage above.
   */
   if (is_disabled() || thd->locked_tables_mode ||
-      thd->variables.query_cache_type == 0 || query_cache_size == 0)
+      thd->variables.query_cache_type == 0)
     goto err;
+  DBUG_ASSERT(query_cache_size != 0);           // otherwise cache would be disabled
+
+  thd->query_cache_is_applicable= 1;
+  sql= org_sql; sql_end= sql + query_length;
 
-  if (!thd->lex->safe_to_cache_query)
+  /*
+    Skip all comments at start of query. The following tests is false for
+    all normal queries.
+  */
+  if (!my_isalpha(system_charset_info, *sql))
   {
-    DBUG_PRINT("qcache", ("SELECT is non-cacheable"));
+    while (sql < sql_end)
+    {
+      char current= *sql;
+      switch (current) {
+      case '/':
+        if (sql[1] != '*')
+          break;
+        sql+= 2;                              // Skip '/*'
+        if (*sql == '!')
+        {
+          /*
+            Found / *!number comment; Skip number to see if sql
+            starts with 'select'
+          */
+          sql++;
+          while (my_isdigit(system_charset_info, *sql))
+            sql++;
+        }
+        else
+        {
+          while (sql++ < sql_end)
+          {
+            if (sql[-1] == '*' && *sql == '/')
+            {
+              sql++;
+              break;
+            }
+          }
+        }
+        continue;
+      case '-':
+        if (sql[1] != '-' || !is_white_space(sql[2])) // Not a comment
+          break;
+        sql++;                               // Skip first '-'
+        /* Fall through */
+      case '#':
+        while (++sql < sql_end)
+        {
+          if (*sql == '\n')
+          {
+            sql++;                            // Skip '\n'
+            break;
+          }
+        }
+        /* Continue with analyzing current symbol */
+        continue;
+      case '\r':
+      case '\n':
+      case '\t':
+      case ' ':
+      case '(':    // To handle (select a from t1) union (select a from t1);
+        sql++;
+        continue;
+      default:
+        break;
+      }
+      /* We only come here when we found the first word of the sql */
+      break;
+    }
+  }
+  if ((my_toupper(system_charset_info, sql[0]) != 'S' ||
+       my_toupper(system_charset_info, sql[1]) != 'E' ||
+       my_toupper(system_charset_info, sql[2]) != 'L'))
+  {
+    DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
     goto err;
   }
 
+  if ((sql_end - sql) > 20 && has_no_cache_directive(sql+6))
   {
-    uint i= 0;
-    /*
-      Skip '(' characters in queries like following:
-      (select a from t1) union (select a from t1);
-    */
-    while (sql[i]=='(')
-      i++;
-
     /*
-      Test if the query is a SELECT
-      (pre-space is removed in dispatch_command).
-
-      First '/' looks like comment before command it is not
-      frequently appeared in real life, consequently we can
-      check all such queries, too.
+      We do not increase 'refused' statistics here since it will be done
+      later when the query is parsed.
     */
-    if ((my_toupper(system_charset_info, sql[i])     != 'S' ||
-         my_toupper(system_charset_info, sql[i + 1]) != 'E' ||
-         my_toupper(system_charset_info, sql[i + 2]) != 'L') &&
-        sql[i] != '/')
-    {
-      DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
-      goto err;
-    }
-    
-    if (query_length > 20 && has_no_cache_directive(&sql[i+6]))
-    {
-      /*
-        We do not increase 'refused' statistics here since it will be done
-        later when the query is parsed.
-      */
-      DBUG_PRINT("qcache", ("The statement has a SQL_NO_CACHE directive"));
-      goto err;
-    }
+    DBUG_PRINT("qcache", ("The statement has a SQL_NO_CACHE directive"));
+    goto err;
   }
 
   /*
@@ -1512,20 +1760,32 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
     disabled or if a full cache flush is in progress, the attempt to
     get the lock is aborted.
 
-    The 'TRUE' parameter indicate that the lock is allowed to timeout
+    The TIMEOUT parameter indicate that the lock is allowed to timeout.
   */
-  if (try_lock(TRUE))
+  if (try_lock(thd, Query_cache::TIMEOUT))
     goto err;
 
   if (query_cache_size == 0)
     goto err_unlock;
 
   Query_cache_block *query_block;
+  if (opt_query_cache_strip_comments)
+  {
+    make_base_query(&thd->base_query, sql, (size_t) (sql_end - sql),
+                    thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE);
+    sql=          thd->base_query.ptr();
+    query_length= thd->base_query.length();
+  }
+  else
+  {
+    sql= org_sql;
+    thd->base_query.set(sql, query_length, system_charset_info);
+  }
 
   tot_length= query_length + thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE;
   if (thd->db_length)
   {
-    memcpy(sql+query_length+1, thd->db, thd->db_length);
+    memcpy((char*) (sql+query_length+1), thd->db, thd->db_length);
     DBUG_PRINT("qcache", ("database: '%s'  length: %u",
 			  thd->db, (unsigned)thd->db_length));
   }
@@ -1656,7 +1916,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
           temporary tables => assign following variable to make check
           faster.
         */
-        thd->lex->safe_to_cache_query=0;
+        thd->query_cache_is_applicable= 0;      // Query can't be cached
         BLOCK_UNLOCK_RD(query_block);
         DBUG_RETURN(-1);
       }
@@ -1672,7 +1932,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 		 ("probably no SELECT access to %s.%s =>  return to normal processing",
 		  table_list.db, table_list.alias));
       unlock();
-      thd->lex->safe_to_cache_query=0;		// Don't try to cache this
+      thd->query_cache_is_applicable= 0;        // Query can't be cached
       BLOCK_UNLOCK_RD(query_block);
       DBUG_RETURN(-1);				// Privilege error
     }
@@ -1681,7 +1941,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       DBUG_PRINT("qcache", ("Need to check column privileges for %s.%s",
 			    table_list.db, table_list.alias));
       BLOCK_UNLOCK_RD(query_block);
-      thd->lex->safe_to_cache_query= 0;		// Don't try to cache this
+      thd->query_cache_is_applicable= 0;        // Query can't be cached
       goto err_unlock;				// Parse query
     }
 #endif /*!NO_EMBEDDED_ACCESS_CHECKS*/
@@ -1705,7 +1965,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
                                   table->key_length());
       }
       else
-        thd->lex->safe_to_cache_query= 0;       // Don't try to cache this
+        thd->query_cache_is_applicable= 0;      // Query can't be cached
       /* End the statement transaction potentially started by engine. */
       trans_rollback_stmt(thd);
       goto err_unlock;				// Parse query
@@ -1749,6 +2009,8 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 
   thd->limit_found_rows = query->found_rows();
   thd->status_var.last_query_cost= 0.0;
+  thd->query_plan_flags= (thd->query_plan_flags & ~QPLAN_QC_NO) | QPLAN_QC;
+
   /*
     End the statement transaction potentially started by an
     engine callback. We ignore the return value for now,
@@ -1765,8 +2027,15 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 
 err_unlock:
   unlock();
-err:
   MYSQL_QUERY_CACHE_MISS(thd->query());
+  /*
+    query_plan_flags doesn't have to be changed here as it contains
+    QPLAN_QC_NO by default
+  */
+  DBUG_RETURN(0);				// Query was not cached
+
+err:
+  thd->query_cache_is_applicable= 0;            // Query can't be cached
   DBUG_RETURN(0);				// Query was not cached
 }
 
@@ -1806,13 +2075,12 @@ void Query_cache::invalidate(THD *thd, TABLE_LIST *tables_used,
   DBUG_VOID_RETURN;
 }
 
-void Query_cache::invalidate(CHANGED_TABLE_LIST *tables_used)
+void Query_cache::invalidate(THD *thd, CHANGED_TABLE_LIST *tables_used)
 {
   DBUG_ENTER("Query_cache::invalidate (changed table list)");
   if (is_disabled())
     DBUG_VOID_RETURN;
 
-  THD *thd= current_thd;
   for (; tables_used; tables_used= tables_used->next)
   {
     thd_proc_info(thd, "invalidating query cache entries (table list)");
@@ -1835,13 +2103,13 @@ void Query_cache::invalidate(CHANGED_TABLE_LIST *tables_used)
   NOTE
     can be used only for opened tables
 */
-void Query_cache::invalidate_locked_for_write(TABLE_LIST *tables_used)
+void Query_cache::invalidate_locked_for_write(THD *thd,
+                                              TABLE_LIST *tables_used)
 {
   DBUG_ENTER("Query_cache::invalidate_locked_for_write");
   if (is_disabled())
     DBUG_VOID_RETURN;
 
-  THD *thd= current_thd;
   for (; tables_used; tables_used= tables_used->next_local)
   {
     thd_proc_info(thd, "invalidating query cache entries (table)");
@@ -1897,9 +2165,8 @@ void Query_cache::invalidate(THD *thd, const char *key, uint32  key_length,
    Remove all cached queries that uses the given database.
 */
 
-void Query_cache::invalidate(char *db)
+void Query_cache::invalidate(THD *thd, char *db)
 {
-  
   DBUG_ENTER("Query_cache::invalidate (db)");
   if (is_disabled())
     DBUG_VOID_RETURN;
@@ -1909,9 +2176,7 @@ void Query_cache::invalidate(char *db)
     Lock the query cache and queue all invalidation attempts to avoid
     the risk of a race between invalidation, cache inserts and flushes.
   */
-  lock();
-
-  THD *thd= current_thd;
+  lock(thd);
 
   if (query_cache_size > 0)
   {
@@ -2018,7 +2283,7 @@ void Query_cache::flush()
 
 */
 
-void Query_cache::pack(ulong join_limit, uint iteration_limit)
+void Query_cache::pack(THD *thd, ulong join_limit, uint iteration_limit)
 {
   DBUG_ENTER("Query_cache::pack");
 
@@ -2029,7 +2294,7 @@ void Query_cache::pack(ulong join_limit, uint iteration_limit)
     If the entire qc is being invalidated we can bail out early
     instead of waiting for the lock.
   */
-  if (try_lock())
+  if (try_lock(thd, Query_cache::WAIT))
     DBUG_VOID_RETURN;
 
   if (query_cache_size == 0)
@@ -2066,11 +2331,25 @@ void Query_cache::destroy()
     mysql_cond_destroy(&COND_cache_status_changed);
     mysql_mutex_destroy(&structure_guard_mutex);
     initialized = 0;
+    DBUG_ASSERT(m_requests_in_progress == 0);
   }
   DBUG_VOID_RETURN;
 }
 
 
+void Query_cache::disable_query_cache(THD *thd)
+{
+  m_cache_status= DISABLE_REQUEST;
+  /*
+    If there is no requests in progress try to free buffer.
+    try_lock(TRY) will exit immediately if there is lock.
+    unlock() should free block.
+  */
+  if (m_requests_in_progress == 0 && !try_lock(thd, TRY))
+    unlock();
+}
+
+
 /*****************************************************************************
   init/destroy
 *****************************************************************************/
@@ -2083,16 +2362,21 @@ void Query_cache::init()
   mysql_cond_init(key_COND_cache_status_changed,
                   &COND_cache_status_changed, NULL);
   m_cache_lock_status= Query_cache::UNLOCKED;
+  m_cache_status= Query_cache::OK;
+  m_requests_in_progress= 0;
   initialized = 1;
+  query_state_map= default_charset_info->state_map;
   /*
-    If we explicitly turn off query cache from the command line query cache will
-    be disabled for the reminder of the server life time. This is because we
-    want to avoid locking the QC specific mutex if query cache isn't going to
-    be used.
+    If we explicitly turn off query cache from the command line query
+    cache will be disabled for the reminder of the server life
+    time. This is because we want to avoid locking the QC specific
+    mutex if query cache isn't going to be used.
   */
   if (global_system_variables.query_cache_type == 0)
-    query_cache.disable_query_cache();
-
+  {
+    free_cache();
+    m_cache_status= DISABLED;
+  }
   DBUG_VOID_RETURN;
 }
 
@@ -2305,6 +2589,18 @@ void Query_cache::free_cache()
 {
   DBUG_ENTER("Query_cache::free_cache");
 
+  /* Destroy locks */
+  Query_cache_block *block= queries_blocks;
+  if (block)
+  {
+    do
+    {
+      Query_cache_query *query= block->query();
+      mysql_rwlock_destroy(&query->lock);
+      block= block->next;
+    } while (block != queries_blocks);
+  }
+
   my_free(cache);
   make_disabled();
   my_hash_free(&queries);
@@ -2784,7 +3080,7 @@ void Query_cache::invalidate_table(THD *thd, uchar * key, uint32  key_length)
     Lock the query cache and queue all invalidation attempts to avoid
     the risk of a race between invalidation, cache inserts and flushes.
   */
-  lock();
+  lock(thd);
 
   DEBUG_SYNC(thd, "wait_in_query_cache_invalidate2");
 
@@ -3562,7 +3858,7 @@ Query_cache::process_and_count_tables(THD *thd, TABLE_LIST *tables_used,
     {
       DBUG_PRINT("qcache", ("Don't cache statement as it refers to "
                             "tables with column privileges."));
-      thd->lex->safe_to_cache_query= 0;
+      thd->query_cache_is_applicable= 0;        // Query can't be cached
       DBUG_RETURN(0);
     }
 #endif
@@ -3575,16 +3871,17 @@ Query_cache::process_and_count_tables(THD *thd, TABLE_LIST *tables_used,
     }
     else
     {
-      DBUG_PRINT("qcache", ("table: %s  db:  %s  type: %u",
-                            tables_used->table->s->table_name.str,
-                            tables_used->table->s->db.str,
-                            tables_used->table->s->db_type()->db_type));
       if (tables_used->derived)
       {
+        DBUG_PRINT("qcache", ("table: %s", tables_used->alias));
         table_count--;
         DBUG_PRINT("qcache", ("derived table skipped"));
         continue;
       }
+      DBUG_PRINT("qcache", ("table: %s  db:  %s  type: %u",
+                            tables_used->table->s->table_name.str,
+                            tables_used->table->s->db.str,
+                            tables_used->table->s->db_type()->db_type));
       *tables_type|= tables_used->table->file->table_cache_type();
 
       /*
@@ -3627,14 +3924,13 @@ Query_cache::process_and_count_tables(THD *thd, TABLE_LIST *tables_used,
 */
 
 TABLE_COUNTER_TYPE
-Query_cache::is_cacheable(THD *thd, size_t query_len, const char *query,
-                          LEX *lex,
+Query_cache::is_cacheable(THD *thd, LEX *lex,
                           TABLE_LIST *tables_used, uint8 *tables_type)
 {
   TABLE_COUNTER_TYPE table_count;
   DBUG_ENTER("Query_cache::is_cacheable");
 
-  if (query_cache_is_cacheable_query(lex) &&
+  if (thd->lex->safe_to_cache_query &&
       (thd->variables.query_cache_type == 1 ||
        (thd->variables.query_cache_type == 2 && (lex->select_lex.options &
 						 OPTION_TO_QUERY_CACHE))))
@@ -3699,7 +3995,7 @@ my_bool Query_cache::ask_handler_allowance(THD *thd,
     {
       DBUG_PRINT("qcache", ("Handler does not allow caching for %s.%s",
 			    tables_used->db, tables_used->alias));
-      thd->lex->safe_to_cache_query= 0;          // Don't try to cache this
+      thd->query_cache_is_applicable= 0;        // Query can't be cached
       DBUG_RETURN(1);
     }
   }
diff --git a/sql/sql_cache.h b/sql/sql_cache.h
index 6360b732620..36a05b49fa9 100644
--- a/sql/sql_cache.h
+++ b/sql/sql_cache.h
@@ -138,7 +138,7 @@ struct Query_cache_block
   block_type type;
   TABLE_COUNTER_TYPE n_tables;			// number of tables in query
 
-  inline my_bool is_free(void) { return type == FREE; }
+  inline bool is_free(void) { return type == FREE; }
   void init(ulong length);
   void destroy();
   inline uint headers_len();
@@ -179,7 +179,7 @@ struct Query_cache_query
   }
   void lock_writing();
   void lock_reading();
-  my_bool try_lock_writing();
+  bool try_lock_writing();
   void unlock_writing();
   void unlock_reading();
 };
@@ -293,14 +293,14 @@ private:
   my_thread_id m_cache_lock_thread_id;
 #endif
   mysql_cond_t COND_cache_status_changed;
+  uint m_requests_in_progress;
   enum Cache_lock_status { UNLOCKED, LOCKED_NO_WAIT, LOCKED };
   Cache_lock_status m_cache_lock_status;
-
-  bool m_query_cache_is_disabled;
+  enum Cache_staus {OK, DISABLE_REQUEST, DISABLED};
+  Cache_staus m_cache_status;
 
   void free_query_internal(Query_cache_block *point);
   void invalidate_table_internal(THD *thd, uchar *key, uint32 key_length);
-  void disable_query_cache(void) { m_query_cache_is_disabled= TRUE; }
 
 protected:
   /*
@@ -312,7 +312,7 @@ protected:
       2. query block (for operation inside query (query block/results))
 
     Thread doing cache flush releases the mutex once it sets
-    m_cache_status flag, so other threads may bypass the cache as
+    m_cache_lock_status flag, so other threads may bypass the cache as
     if it is disabled, not waiting for reset to finish.  The exception
     is other threads that were going to do cache flush---they'll wait
     till the end of a flush operation.
@@ -332,7 +332,7 @@ protected:
   
   uint mem_bin_num, mem_bin_steps;		// See at init_cache & find_bin
 
-  my_bool initialized;
+  bool initialized;
 
   /* Exclude/include from cyclic double linked list */
   static void double_linked_list_exclude(Query_cache_block *point,
@@ -427,8 +427,7 @@ protected:
     If query is cacheable return number tables in query
     (query without tables not cached)
   */
-  TABLE_COUNTER_TYPE is_cacheable(THD *thd, size_t query_len,
-                                  const char *query,
+  TABLE_COUNTER_TYPE is_cacheable(THD *thd,
                                   LEX *lex, TABLE_LIST *tables_used,
                                   uint8 *tables_type);
   TABLE_COUNTER_TYPE process_and_count_tables(THD *thd,
@@ -444,7 +443,9 @@ protected:
 	      uint def_query_hash_size = QUERY_CACHE_DEF_QUERY_HASH_SIZE,
 	      uint def_table_hash_size = QUERY_CACHE_DEF_TABLE_HASH_SIZE);
 
-  bool is_disabled(void) { return m_query_cache_is_disabled; }
+  bool is_disabled(void) { return m_cache_status != OK; }
+  bool is_disable_in_progress(void)
+  { return m_cache_status == DISABLE_REQUEST; }
 
   /* initialize cache (mutex) */
   void init();
@@ -465,22 +466,23 @@ protected:
   int send_result_to_client(THD *thd, char *query, uint query_length);
 
   /* Remove all queries that uses any of the listed following tables */
-  void invalidate(THD* thd, TABLE_LIST *tables_used,
+  void invalidate(THD *thd, TABLE_LIST *tables_used,
 		  my_bool using_transactions);
-  void invalidate(CHANGED_TABLE_LIST *tables_used);
-  void invalidate_locked_for_write(TABLE_LIST *tables_used);
-  void invalidate(THD* thd, TABLE *table, my_bool using_transactions);
+  void invalidate(THD *thd, CHANGED_TABLE_LIST *tables_used);
+  void invalidate_locked_for_write(THD *thd, TABLE_LIST *tables_used);
+  void invalidate(THD *thd, TABLE *table, my_bool using_transactions);
   void invalidate(THD *thd, const char *key, uint32  key_length,
 		  my_bool using_transactions);
 
   /* Remove all queries that uses any of the tables in following database */
-  void invalidate(char *db);
+  void invalidate(THD *thd, char *db);
 
   /* Remove all queries that uses any of the listed following table */
   void invalidate_by_MyISAM_filename(const char *filename);
 
   void flush();
-  void pack(ulong join_limit = QUERY_CACHE_PACK_LIMIT,
+  void pack(THD *thd,
+            ulong join_limit = QUERY_CACHE_PACK_LIMIT,
 	    uint iteration_limit = QUERY_CACHE_PACK_ITERATION);
 
   void destroy();
@@ -511,10 +513,13 @@ protected:
 			const char *name);
   my_bool in_blocks(Query_cache_block * point);
 
-  bool try_lock(bool use_timeout= FALSE);
-  void lock(void);
+  enum Cache_try_lock_mode {WAIT, TIMEOUT, TRY};
+  bool try_lock(THD *thd, Cache_try_lock_mode mode= WAIT);
+  void lock(THD *thd);
   void lock_and_suspend(void);
   void unlock(void);
+
+  void disable_query_cache(THD *thd);
 };
 
 #ifdef HAVE_QUERY_CACHE
@@ -550,7 +555,7 @@ struct Query_cache_query_flags
 #define query_cache_resize(A) query_cache.resize(A)
 #define query_cache_set_min_res_unit(A) query_cache.set_min_res_unit(A)
 #define query_cache_invalidate3(A, B, C) query_cache.invalidate(A, B, C)
-#define query_cache_invalidate1(A) query_cache.invalidate(A)
+#define query_cache_invalidate1(A, B) query_cache.invalidate(A, B)
 #define query_cache_send_result_to_client(A, B, C) \
   query_cache.send_result_to_client(A, B, C)
 #define query_cache_invalidate_by_MyISAM_filename_ref \
@@ -562,20 +567,19 @@ struct Query_cache_query_flags
   (((L)->sql_command == SQLCOM_SELECT) && (L)->safe_to_cache_query)
 #else
 #define QUERY_CACHE_FLAGS_SIZE 0
-#define query_cache_store_query(A, B)
-#define query_cache_destroy()
-#define query_cache_result_size_limit(A)
-#define query_cache_init()
-#define query_cache_resize(A)
-#define query_cache_set_min_res_unit(A)
-#define query_cache_invalidate3(A, B, C)
-#define query_cache_invalidate1(A)
+#define query_cache_store_query(A, B)     do { } while(0)
+#define query_cache_destroy()             do { } while(0)
+#define query_cache_result_size_limit(A)  do { } while(0)
+#define query_cache_init()                do { } while(0)
+#define query_cache_resize(A)             do { } while(0)
+#define query_cache_set_min_res_unit(A)   do { } while(0)
+#define query_cache_invalidate3(A, B, C)  do { } while(0)
+#define query_cache_invalidate1(A,B)      do { } while(0)
 #define query_cache_send_result_to_client(A, B, C) 0
 #define query_cache_invalidate_by_MyISAM_filename_ref NULL
 
-#define query_cache_abort(A)
-#define query_cache_end_of_result(A)
-#define query_cache_invalidate_by_MyISAM_filename_ref NULL
+#define query_cache_abort(A)              do { } while(0)
+#define query_cache_end_of_result(A)      do { } while(0)
 #define query_cache_maybe_disabled(T) 1
 #define query_cache_is_cacheable_query(L) 0
 #endif /*HAVE_QUERY_CACHE*/
diff --git a/sql/sql_callback.h b/sql/sql_callback.h
index 32020c6f2da..316f94a0213 100644
--- a/sql/sql_callback.h
+++ b/sql/sql_callback.h
@@ -39,5 +39,4 @@
 #define MYSQL_CALLBACK_ELSE(OBJ, FUNC, PARAMS, ELSE)    \
   (((OBJ) && ((OBJ)->FUNC)) ? (OBJ)->FUNC PARAMS : (ELSE))
 
-
 #endif /* SQL_CALLBACK_INCLUDED */
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 9b5772d3d07..e1ff29e0b24 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -57,9 +57,11 @@
 #include "sp_rcontext.h"
 #include "sp_cache.h"
 #include "transaction.h"
+#include "sql_select.h" /* declares create_tmp_table() */
 #include "debug_sync.h"
 #include "sql_parse.h"                          // is_update_query
 #include "sql_callback.h"
+#include "sql_connect.h"
 
 /*
   The following is used to initialise Table_ident with a internal
@@ -125,6 +127,7 @@ Key::Key(const Key &rhs, MEM_ROOT *mem_root)
   key_create_info(rhs.key_create_info),
   columns(rhs.columns, mem_root),
   name(rhs.name),
+  option_list(rhs.option_list),
   generated(rhs.generated)
 {
   list_copy_and_replace_each_value(columns, mem_root);
@@ -214,58 +217,65 @@ bool foreign_key_prefix(Key *a, Key *b)
 #endif
 }
 
+/*
+  @brief
+  Check if the foreign key options are compatible with the specification
+  of the columns on which the key is created
+
+  @retval
+    FALSE   The foreign key options are compatible with key columns
+  @retval
+    TRUE    Otherwise
+*/
+bool Foreign_key::validate(List<Create_field> &table_fields)
+{
+  Create_field  *sql_field;
+  Key_part_spec *column;
+  List_iterator<Key_part_spec> cols(columns);
+  List_iterator<Create_field> it(table_fields);
+  DBUG_ENTER("Foreign_key::validate");
+  while ((column= cols++))
+  {
+    it.rewind();
+    while ((sql_field= it++) &&
+           my_strcasecmp(system_charset_info,
+                         column->field_name.str,
+                         sql_field->field_name)) {}
+    if (!sql_field)
+    {
+      my_error(ER_KEY_COLUMN_DOES_NOT_EXITS, MYF(0), column->field_name);
+      DBUG_RETURN(TRUE);
+    }
+    if (type == Key::FOREIGN_KEY && sql_field->vcol_info)
+    {
+      if (delete_opt == FK_OPTION_SET_NULL)
+      {
+        my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), 
+                 "ON DELETE SET NULL");
+        DBUG_RETURN(TRUE);
+      }
+      if (update_opt == FK_OPTION_SET_NULL)
+      {
+        my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), 
+                 "ON UPDATE SET NULL");
+        DBUG_RETURN(TRUE);
+      }
+      if (update_opt == FK_OPTION_CASCADE)
+      {
+        my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), 
+                 "ON UPDATE CASCADE");
+        DBUG_RETURN(TRUE);
+      }
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
 
 /****************************************************************************
 ** Thread specific functions
 ****************************************************************************/
 
 /**
-  Get reference to scheduler data object
-
-  @param thd            THD object
-
-  @retval               Scheduler data object on THD
-*/
-void *thd_get_scheduler_data(THD *thd)
-{
-  return thd->scheduler.data;
-}
-
-/**
-  Set reference to Scheduler data object for THD object
-
-  @param thd            THD object
-  @param psi            Scheduler data object to set on THD
-*/
-void thd_set_scheduler_data(THD *thd, void *data)
-{
-  thd->scheduler.data= data;
-}
-
-/**
-  Get reference to Performance Schema object for THD object
-
-  @param thd            THD object
-
-  @retval               Performance schema object for thread on THD
-*/
-PSI_thread *thd_get_psi(THD *thd)
-{
-  return thd->scheduler.m_psi;
-}
-
-/**
-  Set reference to Performance Schema object for THD object
-
-  @param thd            THD object
-  @param psi            Performance schema object for thread
-*/
-void thd_set_psi(THD *thd, PSI_thread *psi)
-{
-  thd->scheduler.m_psi= psi;
-}
-
-/**
   Set the state on connection to killed
 
   @param thd               THD object
@@ -342,33 +352,6 @@ THD *thd_get_current_thd()
 }
 
 /**
-  Set up various THD data for a new connection
-
-  thd_new_connection_setup
-
-  @param              thd            THD object
-  @param              stack_start    Start of stack for connection
-*/
-void thd_new_connection_setup(THD *thd, char *stack_start)
-{
-#ifdef HAVE_PSI_INTERFACE
-  if (PSI_server)
-    thd_set_psi(thd,
-                PSI_server->new_thread(key_thread_one_connection,
-                                       thd,
-                                       thd_get_thread_id((MYSQL_THD)thd)));
-#endif
-  thd->set_time();
-  thd->prior_thr_create_utime= thd->thr_create_utime= thd->start_utime=
-    my_micro_time();
-  threads.append(thd);
-  thd_unlock_thread_count(thd);
-  DBUG_PRINT("info", ("init new connection. thd: 0x%lx fd: %d",
-          (ulong)thd, thd->net.vio->sd));
-  thd_set_thread_stack(thd, stack_start);
-}
-
-/**
   Lock data that needs protection in THD object
 
   @param thd                   THD object
@@ -509,13 +492,11 @@ int thd_tablespace_op(const THD *thd)
 
 
 extern "C"
-const char *set_thd_proc_info(void *thd_arg, const char *info,
+const char *set_thd_proc_info(THD *thd, const char *info,
                               const char *calling_function,
                               const char *calling_file,
                               const unsigned int calling_line)
 {
-  THD *thd= (THD *) thd_arg;
-
   if (!thd)
     thd= current_thd;
 
@@ -745,7 +726,7 @@ THD::THD()
    :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION,
               /* statement id */ 0),
    rli_fake(0),
-   user_time(0), in_sub_stmt(0),
+   in_sub_stmt(0),
    binlog_unsafe_warning_flags(0),
    binlog_table_maps(0),
    table_map_for_update(0),
@@ -757,6 +738,7 @@ THD::THD()
    examined_row_count(0),
    warning_info(&main_warning_info),
    stmt_da(&main_da),
+   global_disable_checkpoint(0),
    is_fatal_error(0),
    transaction_rollback_request(0),
    is_fatal_sub_stmt_error(0),
@@ -783,12 +765,14 @@ THD::THD()
   init_sql_alloc(&main_mem_root, ALLOC_ROOT_MIN_BLOCK_SIZE, 0);
   stmt_arena= this;
   thread_stack= 0;
+  scheduler= thread_scheduler;                 // Will be fixed later
+  extra_port= 0;
   catalog= (char*)"std"; // the only catalog we have for now
   main_security_ctx.init();
   security_ctx= &main_security_ctx;
   no_errors= 0;
   password= 0;
-  query_start_used= 0;
+  query_start_used= query_start_sec_part_used= 0;
   count_cuted_fields= CHECK_FIELD_IGNORE;
   killed= NOT_KILLED;
   col_access=0;
@@ -802,9 +786,12 @@ THD::THD()
   statement_id_counter= 0UL;
   // Must be reset to handle error with THD's created for init of mysqld
   lex->current_select= 0;
-  start_time=(time_t) 0;
+  user_time.val= start_time= start_time_sec_part= 0;
   start_utime= prior_thr_create_utime= 0L;
   utime_after_lock= 0L;
+  progress.arena= 0;
+  progress.report_to_client= 0;
+  progress.max_counter= 0;
   current_linfo =  0;
   slave_thread = 0;
   bzero(&variables, sizeof(variables));
@@ -818,6 +805,7 @@ THD::THD()
   mysys_var=0;
   binlog_evt_union.do_union= FALSE;
   enable_slow_log= 0;
+
 #ifndef DBUG_OFF
   dbug_sentry=THD_SENTRY_MAGIC;
 #endif
@@ -832,10 +820,21 @@ THD::THD()
   peer_port= 0;					// For SHOW PROCESSLIST
   transaction.m_pending_rows_event= 0;
   transaction.on= 1;
+  wt_thd_lazy_init(&transaction.wt, &variables.wt_deadlock_search_depth_short,
+                                    &variables.wt_timeout_short,
+                                    &variables.wt_deadlock_search_depth_long,
+                                    &variables.wt_timeout_long);
 #ifdef SIGNAL_WITH_VIO_CLOSE
   active_vio = 0;
 #endif
   mysql_mutex_init(key_LOCK_thd_data, &LOCK_thd_data, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
+  mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0);
+  /*
+    LOCK_thread_count goes before LOCK_thd_data - the former is called around
+    'delete thd', the latter - in THD::~THD
+  */
+  mysql_mutex_record_order(&LOCK_thread_count, &LOCK_thd_data);
 
   /* Variables with default values */
   proc_info="login";
@@ -874,19 +873,24 @@ THD::THD()
 
   tablespace_op=FALSE;
   tmp= sql_rnd_with_mutex();
-  randominit(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id);
+  my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id);
   substitute_null_with_insert_id = FALSE;
   thr_lock_info_init(&lock_info); /* safety: will be reset after start */
 
   m_internal_handler= NULL;
   m_binlog_invoker= FALSE;
+  arena_for_cached_items= 0;
   memset(&invoker_user, 0, sizeof(invoker_user));
   memset(&invoker_host, 0, sizeof(invoker_host));
+  prepare_derived_at_open= FALSE;
+  create_tmp_table_for_derived= FALSE;
+  save_prep_leaf_list= FALSE;
 }
 
 
 void THD::push_internal_handler(Internal_error_handler *handler)
 {
+  DBUG_ENTER("THD::push_internal_handler");
   if (m_internal_handler)
   {
     handler->m_prev_internal_handler= m_internal_handler;
@@ -896,6 +900,7 @@ void THD::push_internal_handler(Internal_error_handler *handler)
   {
     m_internal_handler= handler;
   }
+  DBUG_VOID_RETURN;
 }
 
 bool THD::handle_condition(uint sql_errno,
@@ -920,17 +925,17 @@ bool THD::handle_condition(uint sql_errno,
       return TRUE;
     }
   }
-
   return FALSE;
 }
 
 
 Internal_error_handler *THD::pop_internal_handler()
 {
+  DBUG_ENTER("THD::pop_internal_handler");
   DBUG_ASSERT(m_internal_handler != NULL);
   Internal_error_handler *popped_handler= m_internal_handler;
   m_internal_handler= m_internal_handler->m_prev_internal_handler;
-  return popped_handler;
+  DBUG_RETURN(popped_handler);
 }
 
 
@@ -1188,18 +1193,70 @@ void THD::init(void)
   update_charset();
   reset_current_stmt_binlog_format_row();
   bzero((char *) &status_var, sizeof(status_var));
+  bzero((char *) &org_status_var, sizeof(org_status_var));
 
   if (variables.sql_log_bin)
     variables.option_bits|= OPTION_BIN_LOG;
   else
     variables.option_bits&= ~OPTION_BIN_LOG;
 
+  select_commands= update_commands= other_commands= 0;
+  /* Set to handle counting of aborted connections */
+  userstat_running= opt_userstat_running;
+  last_global_update_time= current_connect_time= time(NULL);
 #if defined(ENABLED_DEBUG_SYNC)
   /* Initialize the Debug Sync Facility. See debug_sync.cc. */
   debug_sync_init_thread(this);
 #endif /* defined(ENABLED_DEBUG_SYNC) */
 }
 
+ 
+/* Updates some status variables to be used by update_global_user_stats */
+
+void THD::update_stats(void)
+{
+  /* sql_command == SQLCOM_END in case of parse errors or quit */
+  if (lex->sql_command != SQLCOM_END)
+  {
+    /* A SQL query. */
+    if (lex->sql_command == SQLCOM_SELECT)
+      select_commands++;
+    else if (sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND)
+    {
+      /* Ignore 'SHOW ' commands */
+    }
+    else if (is_update_query(lex->sql_command))
+      update_commands++;
+    else
+      other_commands++;
+  }
+}
+
+
+void THD::update_all_stats()
+{
+  ulonglong end_cpu_time, end_utime;
+  double busy_time, cpu_time;
+
+  /* This is set at start of query if opt_userstat_running was set */
+  if (!userstat_running)
+    return;
+
+  end_cpu_time= my_getcputime();
+  end_utime=    microsecond_interval_timer();
+  busy_time= (end_utime - start_utime) / 1000000.0;
+  cpu_time=  (end_cpu_time - start_cpu_time) / 10000000.0;
+  /* In case there are bad values, 2629743 is the #seconds in a month. */
+  if (cpu_time > 2629743.0)
+    cpu_time= 0;
+  status_var_add(status_var.cpu_time, cpu_time);
+  status_var_add(status_var.busy_time, busy_time);
+
+  update_global_user_stats(this, TRUE, my_time(0));
+  // Has to be updated after update_global_user_stats()
+  userstat_running= 0;
+}
+
 
 /*
   Init THD for query processing.
@@ -1290,6 +1347,12 @@ void THD::cleanup(void)
 
   /* All metadata locks must have been released by now. */
   DBUG_ASSERT(!mdl_context.has_locks());
+  if (user_connect)
+  {
+    decrease_user_connections(user_connect);
+    user_connect= 0;                            // Safety
+  }
+  wt_thd_destroy(&transaction.wt);
 
 #if defined(ENABLED_DEBUG_SYNC)
   /* End the Debug Sync Facility. See debug_sync.cc. */
@@ -1347,6 +1410,8 @@ THD::~THD()
   my_free(db);
   db= NULL;
   free_root(&transaction.mem_root,MYF(0));
+  mysql_cond_destroy(&COND_wakeup_ready);
+  mysql_mutex_destroy(&LOCK_wakeup_ready);
   mysql_mutex_destroy(&LOCK_thd_data);
 #ifndef DBUG_OFF
   dbug_sentry= THD_SENTRY_GONE;
@@ -1375,9 +1440,8 @@ THD::~THD()
    from_var     from this array
 
   NOTES
-    This function assumes that all variables are long/ulong.
-    If this assumption will change, then we have to explictely add
-    the other variables after the while loop
+    This function assumes that all variables at start are long/ulong and
+    other types are handled explicitely
 */
 
 void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var)
@@ -1390,8 +1454,15 @@ void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var)
   while (to != end)
     *(to++)+= *(from++);
 
-  to_var->bytes_received+= from_var->bytes_received;
-  to_var->bytes_sent+= from_var->bytes_sent;
+  /* Handle the not ulong variables. See end of system_status_var */
+  to_var->bytes_received+=      from_var->bytes_received;
+  to_var->bytes_sent+=          from_var->bytes_sent;
+  to_var->rows_read+=           from_var->rows_read;
+  to_var->rows_sent+=           from_var->rows_sent;
+  to_var->rows_tmp_read+=       from_var->rows_tmp_read;
+  to_var->binlog_bytes_written+= from_var->binlog_bytes_written;
+  to_var->cpu_time+=            from_var->cpu_time;
+  to_var->busy_time+=           from_var->busy_time;
 }
 
 /*
@@ -1404,7 +1475,8 @@ void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var)
     dec_var      minus this array
   
   NOTE
-    This function assumes that all variables are long/ulong.
+    This function assumes that all variables at start are long/ulong and
+    other types are handled explicitely
 */
 
 void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
@@ -1418,9 +1490,25 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
   while (to != end)
     *(to++)+= *(from++) - *(dec++);
 
-  to_var->bytes_received+= from_var->bytes_received - dec_var->bytes_received;;
-  to_var->bytes_sent+= from_var->bytes_sent - dec_var->bytes_sent;
-}
+  to_var->bytes_received+=       from_var->bytes_received -
+                                 dec_var->bytes_received;
+  to_var->bytes_sent+=           from_var->bytes_sent - dec_var->bytes_sent;
+  to_var->rows_read+=            from_var->rows_read - dec_var->rows_read;
+  to_var->rows_sent+=            from_var->rows_sent - dec_var->rows_sent;
+  to_var->rows_tmp_read+=        from_var->rows_tmp_read - dec_var->rows_tmp_read;
+  to_var->binlog_bytes_written+= from_var->binlog_bytes_written -
+                                 dec_var->binlog_bytes_written;
+  to_var->cpu_time+=             from_var->cpu_time - dec_var->cpu_time;
+  to_var->busy_time+=            from_var->busy_time - dec_var->busy_time;
+}
+
+#define SECONDS_TO_WAIT_FOR_KILL 2
+#if !defined(__WIN__) && defined(HAVE_SELECT)
+/* my_sleep() can wait for sub second times */
+#define WAIT_FOR_KILL_TRY_TIMES 20
+#else
+#define WAIT_FOR_KILL_TRY_TIMES 2
+#endif
 
 
 /**
@@ -1483,7 +1571,7 @@ void THD::awake(THD::killed_state state_to_set)
 
     /* Send an event to the scheduler that a thread should be killed. */
     if (!slave_thread)
-      MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (this));
+      MYSQL_CALLBACK(scheduler, post_kill_notification, (this));
   }
 
   /* Broadcast a condition to kick the target if it is waiting on it. */
@@ -1515,12 +1603,35 @@ void THD::awake(THD::killed_state state_to_set)
       enter_cond(). This should make the signaling as safe as possible.
       However, there is still a small chance of failure on platforms with
       instruction or memory write reordering.
+
+      We have to do the loop with trylock, because if we would use
+      pthread_mutex_lock(), we can cause a deadlock as we are here locking
+      the mysys_var->mutex and mysys_var->current_mutex in a different order
+      than in the thread we are trying to kill.
+      We only sleep for 2 seconds as we don't want to have LOCK_thd_data
+      locked too long time.
+
+      There is a small change we may not succeed in aborting a thread that
+      is not yet waiting for a mutex, but as this happens only for a
+      thread that was doing something else when the kill was issued and
+      which should detect the kill flag before it starts to wait, this
+      should be good enough.
     */
     if (mysys_var->current_cond && mysys_var->current_mutex)
     {
-      mysql_mutex_lock(mysys_var->current_mutex);
-      mysql_cond_broadcast(mysys_var->current_cond);
-      mysql_mutex_unlock(mysys_var->current_mutex);
+      uint i;
+      for (i= 0; i < WAIT_FOR_KILL_TRY_TIMES * SECONDS_TO_WAIT_FOR_KILL; i++)
+      {
+        int ret= mysql_mutex_trylock(mysys_var->current_mutex);
+        mysql_cond_broadcast(mysys_var->current_cond);
+        if (!ret)
+        {
+          /* Signal is sure to get through */
+          mysql_mutex_unlock(mysys_var->current_mutex);
+          break;
+        }
+      }
+      my_sleep(1000000L / WAIT_FOR_KILL_TRY_TIMES);
     }
     mysql_mutex_unlock(&mysys_var->mutex);
   }
@@ -1593,35 +1704,36 @@ bool THD::store_globals()
   */
   mysys_var->id= thread_id;
   real_id= pthread_self();                      // For debugging
+  mysys_var->stack_ends_here= thread_stack +    // for consistency, see libevent_thread_proc
+                              STACK_DIRECTION * (long)my_thread_stack_size;
 
   /*
     We have to call thr_lock_info_init() again here as THD may have been
     created in another thread
   */
   thr_lock_info_init(&lock_info);
+
   return 0;
 }
 
-/*
-  Remove the thread specific info (THD and mem_root pointer) stored during
-  store_global call for this thread.
+/**
+   Untie THD from current thread
+
+   Used when using --thread-handling=pool-of-threads
 */
-bool THD::restore_globals()
+
+void THD::reset_globals()
 {
-  /*
-    Assert that thread_stack is initialized: it's necessary to be able
-    to track stack overrun.
-  */
-  DBUG_ASSERT(thread_stack);
-  
+  mysql_mutex_lock(&LOCK_thd_data);
+  mysys_var= 0;
+  mysql_mutex_unlock(&LOCK_thd_data);
+
   /* Undocking the thread specific data. */
   my_pthread_setspecific_ptr(THR_THD, NULL);
   my_pthread_setspecific_ptr(THR_MALLOC, NULL);
   
-  return 0;
 }
 
-
 /*
   Cleanup after query.
 
@@ -1640,6 +1752,10 @@ bool THD::restore_globals()
 
 void THD::cleanup_after_query()
 {
+  DBUG_ENTER("THD::cleanup_after_query");
+
+  thd_progress_end(this);
+
   /*
     Reset rand_used so that detection of calls to rand() will save random 
     seeds if needed by the slave.
@@ -1673,23 +1789,10 @@ void THD::cleanup_after_query()
   /* reset table map for multi-table update */
   table_map_for_update= 0;
   m_binlog_invoker= FALSE;
+  DBUG_VOID_RETURN;
 }
 
 
-LEX_STRING *
-make_lex_string_root(MEM_ROOT *mem_root,
-                     LEX_STRING *lex_str, const char* str, uint length,
-                     bool allocate_lex_string)
-{
-  if (allocate_lex_string)
-    if (!(lex_str= (LEX_STRING *)alloc_root(mem_root, sizeof(LEX_STRING))))
-      return 0;
-  if (!(lex_str->str= strmake_root(mem_root, str, length)))
-    return 0;
-  lex_str->length= length;
-  return lex_str;
-}
-
 /**
   Create a LEX_STRING in this connection.
 
@@ -1704,8 +1807,13 @@ LEX_STRING *THD::make_lex_string(LEX_STRING *lex_str,
                                  const char* str, uint length,
                                  bool allocate_lex_string)
 {
-  return make_lex_string_root (mem_root, lex_str, str,
-                               length, allocate_lex_string);
+  if (allocate_lex_string)
+    if (!(lex_str= (LEX_STRING *)alloc_root(mem_root, sizeof(LEX_STRING))))
+      return 0;
+  if (!(lex_str->str= strmake_root(mem_root, str, length)))
+    return 0;
+  lex_str->length= length;
+  return lex_str;
 }
 
 
@@ -1898,7 +2006,8 @@ int THD::send_explain_fields(select_result *result)
   List<Item> field_list;
   Item *item;
   CHARSET_INFO *cs= system_charset_info;
-  field_list.push_back(new Item_return_int("id",3, MYSQL_TYPE_LONGLONG));
+  field_list.push_back(item= new Item_return_int("id",3, MYSQL_TYPE_LONGLONG));
+  item->maybe_null= 1;
   field_list.push_back(new Item_empty_string("select_type", 19, cs));
   field_list.push_back(item= new Item_empty_string("table", NAME_CHAR_LEN, cs));
   item->maybe_null= 1;
@@ -1995,6 +2104,36 @@ void THD::nocheck_register_item_tree_change(Item **place, Item *old_value,
   change_list.append(change);
 }
 
+/**
+  Check and register item change if needed
+
+  @param place           place where we should assign new value
+  @param new_value       place of the new value
+
+  @details
+    Let C be a reference to an item that changed the reference A
+    at the location (occurrence) L1 and this change has been registered.
+    If C is substituted for reference A another location (occurrence) L2
+    that is to be registered as well than this change has to be
+    consistent with the first change in order the procedure that rollback
+    changes to substitute the same reference at both locations L1 and L2.
+*/
+
+void THD::check_and_register_item_tree_change(Item **place, Item **new_value,
+                                              MEM_ROOT *runtime_memroot)
+{
+  Item_change_record *change;
+  I_List_iterator<Item_change_record> it(change_list);
+  while ((change= it++))
+  {
+    if (change->place == new_value)
+      break; // we need only very first value
+  }
+  if (change)
+    nocheck_register_item_tree_change(place, change->old_value,
+                                      runtime_memroot);
+}
+
 
 void THD::rollback_item_tree_changes()
 {
@@ -2103,7 +2242,7 @@ void select_send::cleanup()
 
 /* Send data to client. Returns 0 if ok */
 
-bool select_send::send_data(List<Item> &items)
+int select_send::send_data(List<Item> &items)
 {
   Protocol *protocol= thd->protocol;
   DBUG_ENTER("select_send::send_data");
@@ -2390,7 +2529,7 @@ select_export::prepare(List<Item> &list, SELECT_LEX_UNIT *u)
                           (int) (uchar) (x) == line_sep_char  || \
                           !(x))
 
-bool select_export::send_data(List<Item> &items)
+int select_export::send_data(List<Item> &items)
 {
 
   DBUG_ENTER("select_export::send_data");
@@ -2596,7 +2735,6 @@ bool select_export::send_data(List<Item> &items)
     {						// Fill with space
       if (item->max_length > used_length)
       {
-	/* QQ:  Fix by adding a my_b_fill() function */
 	if (!space_inited)
 	{
 	  space_inited=1;
@@ -2648,7 +2786,7 @@ select_dump::prepare(List<Item> &list __attribute__((unused)),
 }
 
 
-bool select_dump::send_data(List<Item> &items)
+int select_dump::send_data(List<Item> &items)
 {
   List_iterator_fast<Item> li(items);
   char buff[MAX_FIELD_WIDTH];
@@ -2693,7 +2831,7 @@ select_subselect::select_subselect(Item_subselect *item_arg)
 }
 
 
-bool select_singlerow_subselect::send_data(List<Item> &items)
+int select_singlerow_subselect::send_data(List<Item> &items)
 {
   DBUG_ENTER("select_singlerow_subselect::send_data");
   Item_singlerow_subselect *it= (Item_singlerow_subselect *)item;
@@ -2724,7 +2862,7 @@ void select_max_min_finder_subselect::cleanup()
 }
 
 
-bool select_max_min_finder_subselect::send_data(List<Item> &items)
+int select_max_min_finder_subselect::send_data(List<Item> &items)
 {
   DBUG_ENTER("select_max_min_finder_subselect::send_data");
   Item_maxmin_subselect *it= (Item_maxmin_subselect *)item;
@@ -2742,8 +2880,7 @@ bool select_max_min_finder_subselect::send_data(List<Item> &items)
     if (!cache)
     {
       cache= Item_cache::get_cache(val_item);
-      switch (val_item->result_type())
-      {
+      switch (val_item->result_type()) {
       case REAL_RESULT:
 	op= &select_max_min_finder_subselect::cmp_real;
 	break;
@@ -2757,6 +2894,8 @@ bool select_max_min_finder_subselect::send_data(List<Item> &items)
         op= &select_max_min_finder_subselect::cmp_decimal;
         break;
       case ROW_RESULT:
+      case TIME_RESULT:
+      case IMPOSSIBLE_RESULT:
         // This case should never be choosen
 	DBUG_ASSERT(0);
 	op= 0;
@@ -2828,7 +2967,7 @@ bool select_max_min_finder_subselect::cmp_str()
      sortcmp(val1, val2, cache->collation.collation) < 0);
 }
 
-bool select_exists_subselect::send_data(List<Item> &items)
+int select_exists_subselect::send_data(List<Item> &items)
 {
   DBUG_ENTER("select_exists_subselect::send_data");
   Item_exists_subselect *it= (Item_exists_subselect *)item;
@@ -2889,6 +3028,7 @@ void Query_arena::free_items()
   for (; free_list; free_list= next)
   {
     next= free_list->next;
+    DBUG_ASSERT(free_list != next);
     free_list->delete_self();
   }
   /* Postcondition: free_list is 0 */
@@ -2962,6 +3102,7 @@ void Statement::restore_backup_statement(Statement *stmt, Statement *backup)
 
 void THD::end_statement()
 {
+  DBUG_ENTER("THD::end_statement");
   /* Cleanup SQL processing state to reuse this statement in next query. */
   lex_end(lex);
   delete lex->result;
@@ -2972,6 +3113,7 @@ void THD::end_statement()
     Don't free mem_root, as mem_root is freed in the end of dispatch_command
     (once for any command).
   */
+  DBUG_VOID_RETURN;
 }
 
 
@@ -3167,7 +3309,7 @@ Statement_map::~Statement_map()
   my_hash_free(&st_hash);
 }
 
-bool select_dumpvar::send_data(List<Item> &items)
+int select_dumpvar::send_data(List<Item> &items)
 {
   List_iterator_fast<my_var> var_li(var_list);
   List_iterator<Item> it(items);
@@ -3221,6 +3363,94 @@ bool select_dumpvar::send_eof()
   return 0;
 }
 
+
+bool
+select_materialize_with_stats::
+create_result_table(THD *thd_arg, List<Item> *column_types,
+                    bool is_union_distinct, ulonglong options,
+                    const char *table_alias, bool bit_fields_as_long,
+                    bool create_table)
+{
+  DBUG_ASSERT(table == 0);
+  tmp_table_param.field_count= column_types->elements;
+  tmp_table_param.bit_fields_as_long= bit_fields_as_long;
+
+  if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
+                                 (ORDER*) 0, is_union_distinct, 1,
+                                 options, HA_POS_ERROR, (char*) table_alias)))
+    return TRUE;
+
+  col_stat= (Column_statistics*) table->in_use->alloc(table->s->fields *
+                                                      sizeof(Column_statistics));
+  if (!col_stat)
+    return TRUE;
+
+  reset();
+  table->file->extra(HA_EXTRA_WRITE_CACHE);
+  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  return FALSE;
+}
+
+
+void select_materialize_with_stats::reset()
+{
+  memset(col_stat, 0, table->s->fields * sizeof(Column_statistics));
+  max_nulls_in_row= 0;
+  count_rows= 0;
+}
+
+
+void select_materialize_with_stats::cleanup()
+{
+  reset();
+  select_union::cleanup();
+}
+
+
+/**
+  Override select_union::send_data to analyze each row for NULLs and to
+  update null_statistics before sending data to the client.
+
+  @return TRUE if fatal error when sending data to the client
+  @return FALSE on success
+*/
+
+int select_materialize_with_stats::send_data(List<Item> &items)
+{
+  List_iterator_fast<Item> item_it(items);
+  Item *cur_item;
+  Column_statistics *cur_col_stat= col_stat;
+  uint nulls_in_row= 0;
+  int res;
+
+  if ((res= select_union::send_data(items)))
+    return res;
+  /* Skip duplicate rows. */
+  if (write_err == HA_ERR_FOUND_DUPP_KEY ||
+      write_err == HA_ERR_FOUND_DUPP_UNIQUE)
+    return 0;
+
+  ++count_rows;
+
+  while ((cur_item= item_it++))
+  {
+    if (cur_item->is_null_result())
+    {
+      ++cur_col_stat->null_count;
+      cur_col_stat->max_null_row= count_rows;
+      if (!cur_col_stat->min_null_row)
+        cur_col_stat->min_null_row= count_rows;
+      ++nulls_in_row;
+    }
+    ++cur_col_stat;
+  }
+  if (nulls_in_row > max_nulls_in_row)
+    max_nulls_in_row= nulls_in_row;
+
+  return 0;
+}
+
+
 /****************************************************************************
   TMP_TABLE_PARAM
 ****************************************************************************/
@@ -3234,6 +3464,9 @@ void TMP_TABLE_PARAM::init()
   quick_group= 1;
   table_charset= 0;
   precomputed_group_by= 0;
+  bit_fields_as_long= 0;
+  materialized_subquery= 0;
+  skip_create_table= 0;
   DBUG_VOID_RETURN;
 }
 
@@ -3242,7 +3475,8 @@ void thd_increment_bytes_sent(ulong length)
 {
   THD *thd=current_thd;
   if (likely(thd != 0))
-  { /* current_thd==0 when close_connection() calls net_send_error() */
+  {
+    /* current_thd == 0 when close_connection() calls net_send_error() */
     thd->status_var.bytes_sent+= length;
   }
 }
@@ -3464,11 +3698,134 @@ void THD::restore_backup_open_tables_state(Open_tables_backup *backup)
   @retval 0 the user thread is active
   @retval 1 the user thread has been killed
 */
+
 extern "C" int thd_killed(const MYSQL_THD thd)
 {
   return(thd->killed);
 }
 
+
+/**
+   Send an out-of-band progress report to the client
+
+   The report is sent every 'thd->...progress_report_time' second,
+   however not more often than global.progress_report_time.
+   If global.progress_report_time is 0, then don't send progress reports, but
+   check every second if the value has changed
+*/
+
+static void thd_send_progress(THD *thd)
+{
+  /* Check if we should send the client a progress report */
+  ulonglong report_time= my_interval_timer();
+  if (report_time > thd->progress.next_report_time)
+  {
+    uint seconds_to_next= max(thd->variables.progress_report_time,
+                              global_system_variables.progress_report_time);
+    if (seconds_to_next == 0)             // Turned off
+      seconds_to_next= 1;                 // Check again after 1 second
+
+    thd->progress.next_report_time= (report_time +
+                                     seconds_to_next * 1000000000ULL);
+    if (global_system_variables.progress_report_time &&
+        thd->variables.progress_report_time)
+      net_send_progress_packet(thd);
+  }
+}
+
+
+/** Initialize progress report handling **/
+
+extern "C" void thd_progress_init(MYSQL_THD thd, uint max_stage)
+{
+  DBUG_ASSERT(thd->stmt_arena != thd->progress.arena);
+  if (thd->progress.arena)
+    return; // already initialized
+  /*
+    Send progress reports to clients that supports it, if the command
+    is a high level command (like ALTER TABLE) and we are not in a
+    stored procedure
+  */
+  thd->progress.report= ((thd->client_capabilities & CLIENT_PROGRESS) &&
+                         thd->progress.report_to_client &&
+                         !thd->in_sub_stmt);
+  thd->progress.next_report_time= 0;
+  thd->progress.stage= 0;
+  thd->progress.counter= thd->progress.max_counter= 0;
+  thd->progress.max_stage= max_stage;
+  thd->progress.arena= thd->stmt_arena;
+}
+
+
+/* Inform processlist and the client that some progress has been made */
+
+extern "C" void thd_progress_report(MYSQL_THD thd,
+                                    ulonglong progress, ulonglong max_progress)
+{
+  if (thd->stmt_arena != thd->progress.arena)
+    return;
+  if (thd->progress.max_counter != max_progress)        // Simple optimization
+  {
+    mysql_mutex_lock(&thd->LOCK_thd_data);
+    thd->progress.counter= progress;
+    thd->progress.max_counter= max_progress;
+    mysql_mutex_unlock(&thd->LOCK_thd_data);
+  }
+  else
+    thd->progress.counter= progress;
+
+  if (thd->progress.report)
+    thd_send_progress(thd);
+}
+
+/**
+  Move to next stage in process list handling
+
+  This will reset the timer to ensure the progress is sent to the client
+  if client progress reports are activated.
+*/
+
+extern "C" void thd_progress_next_stage(MYSQL_THD thd)
+{
+  if (thd->stmt_arena != thd->progress.arena)
+    return;
+  mysql_mutex_lock(&thd->LOCK_thd_data);
+  thd->progress.stage++;
+  thd->progress.counter= 0;
+  DBUG_ASSERT(thd->progress.stage < thd->progress.max_stage);
+  mysql_mutex_unlock(&thd->LOCK_thd_data);
+  if (thd->progress.report)
+  {
+    thd->progress.next_report_time= 0;          // Send new stage info
+    thd_send_progress(thd);
+  }
+}
+
+/**
+  Disable reporting of progress in process list.
+
+  @note
+  This function is safe to call even if one has not called thd_progress_init.
+
+  This function should be called by all parts that does progress
+  reporting to ensure that progress list doesn't contain 100 % done
+  forever.
+*/
+
+
+extern "C" void thd_progress_end(MYSQL_THD thd)
+{
+  if (thd->stmt_arena != thd->progress.arena)
+    return;
+  /*
+    It's enough to reset max_counter to set disable progress indicator
+    in processlist.
+  */
+  thd->progress.max_counter= 0;
+  thd->progress.arena= 0;
+}
+
+
 /**
   Return the thread id of a user thread
   @param thd user thread
@@ -3481,7 +3838,7 @@ extern "C" unsigned long thd_get_thread_id(const MYSQL_THD thd)
 
 
 #ifdef INNODB_COMPATIBILITY_HOOKS
-extern "C" struct charset_info_st *thd_charset(MYSQL_THD thd)
+extern "C" const struct charset_info_st *thd_charset(MYSQL_THD thd)
 {
   return(thd->charset());
 }
@@ -3539,7 +3896,7 @@ extern "C" bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd)
   return sqlcom_can_generate_row_events(thd);
 }
 
-#ifndef EMBEDDED_LIBRARY
+#ifdef NOT_USED /* we'll do the correctly instead */
 extern "C" void thd_pool_wait_begin(MYSQL_THD thd, int wait_type);
 extern "C" void thd_pool_wait_end(MYSQL_THD thd);
 
@@ -3566,7 +3923,7 @@ extern "C" void thd_pool_wait_end(MYSQL_THD thd);
 */
 extern "C" void thd_wait_begin(MYSQL_THD thd, int wait_type)
 {
-  MYSQL_CALLBACK(thread_scheduler, thd_wait_begin, (thd, wait_type));
+  MYSQL_CALLBACK(thd->scheduler, thd_wait_begin, (thd, wait_type));
 }
 
 /**
@@ -3577,7 +3934,7 @@ extern "C" void thd_wait_begin(MYSQL_THD thd, int wait_type)
 */
 extern "C" void thd_wait_end(MYSQL_THD thd)
 {
-  MYSQL_CALLBACK(thread_scheduler, thd_wait_end, (thd));
+  MYSQL_CALLBACK(thd->scheduler, thd_wait_end, (thd));
 }
 #else
 extern "C" void thd_wait_begin(MYSQL_THD thd, int wait_type)
@@ -3642,6 +3999,7 @@ void THD::reset_sub_statement_state(Sub_statement_state *backup,
   backup->count_cuted_fields= count_cuted_fields;
   backup->in_sub_stmt=     in_sub_stmt;
   backup->enable_slow_log= enable_slow_log;
+  backup->query_plan_flags= query_plan_flags;
   backup->limit_found_rows= limit_found_rows;
   backup->examined_row_count= examined_row_count;
   backup->sent_row_count=   sent_row_count;
@@ -3709,6 +4067,7 @@ void THD::restore_sub_statement_state(Sub_statement_state *backup)
   variables.option_bits= backup->option_bits;
   in_sub_stmt=      backup->in_sub_stmt;
   enable_slow_log=  backup->enable_slow_log;
+  query_plan_flags= backup->query_plan_flags;
   first_successful_insert_id_in_prev_stmt= 
     backup->first_successful_insert_id_in_prev_stmt;
   first_successful_insert_id_in_cur_stmt= 
@@ -3819,12 +4178,9 @@ void THD::get_definer(LEX_USER *definer)
   {
     definer->user = invoker_user;
     definer->host= invoker_host;
-    definer->password.str= NULL;
-    definer->password.length= 0;
-    definer->plugin.str= (char *) "";
-    definer->plugin.length= 0;
-    definer->auth.str=  (char *) "";
-    definer->auth.length= 0;
+    definer->password= null_lex_str;
+    definer->plugin= empty_lex_str;
+    definer->auth= empty_lex_str;
   }
   else
 #endif
@@ -4472,7 +4828,6 @@ THD::binlog_prepare_pending_rows_event(TABLE*, uint32, MY_BITMAP const*,
 
 /* Declare in unnamed namespace. */
 CPP_UNNAMED_NS_START
-
   /**
      Class to handle temporary allocation of memory for row data.
 
@@ -4649,7 +5004,7 @@ int THD::binlog_update_row(TABLE* table, bool is_trans,
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   DBUG_DUMP("before_record", before_record, table->s->reclength);
   DBUG_DUMP("after_record",  after_record, table->s->reclength);
   DBUG_DUMP("before_row",    before_row, before_size);
@@ -4841,8 +5196,8 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
                       bool suppress_use, int errcode)
 {
   DBUG_ENTER("THD::binlog_query");
-  DBUG_PRINT("enter", ("qtype: %s  query: '%s'",
-                       show_query_type(qtype), query_arg));
+  DBUG_PRINT("enter", ("qtype: %s  query: '%-.*s'",
+                       show_query_type(qtype), (int) query_len, query_arg));
   DBUG_ASSERT(query_arg && mysql_bin_log.is_open());
 
   /*
@@ -4885,7 +5240,6 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
       spcont == NULL && !binlog_evt_union.do_union)
     issue_unsafe_warnings();
 
-
   switch (qtype) {
     /*
       ROW_QUERY_TYPE means that the statement may be logged either in
@@ -4930,15 +5284,33 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
       binlog_table_maps= 0;
       DBUG_RETURN(error);
     }
-    break;
 
   case THD::QUERY_TYPE_COUNT:
   default:
-    DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT);
+    DBUG_ASSERT(qtype < QUERY_TYPE_COUNT);
   }
   DBUG_RETURN(0);
 }
 
+void
+THD::wait_for_wakeup_ready()
+{
+  mysql_mutex_lock(&LOCK_wakeup_ready);
+  while (!wakeup_ready)
+    mysql_cond_wait(&COND_wakeup_ready, &LOCK_wakeup_ready);
+  mysql_mutex_unlock(&LOCK_wakeup_ready);
+}
+
+void
+THD::signal_wakeup_ready()
+{
+  mysql_mutex_lock(&LOCK_wakeup_ready);
+  wakeup_ready= true;
+  mysql_mutex_unlock(&LOCK_wakeup_ready);
+  mysql_cond_signal(&COND_wakeup_ready);
+}
+
+
 bool Discrete_intervals_list::append(ulonglong start, ulonglong val,
                                  ulonglong incr)
 {
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 9f13908d31c..284a9d9e42e 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+   2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -27,11 +28,13 @@
 #ifdef MYSQL_SERVER
 #include "unireg.h"                    // REQUIRED: for other includes
 #endif
+#include <waiting_threads.h>
 #include "sql_const.h"
 #include <mysql/plugin_audit.h>
 #include "log.h"
 #include "rpl_tblmap.h"
 #include "mdl.h"
+#include "probes_mysql.h"
 #include "sql_locale.h"                         /* my_locale_st */
 #include "sql_profile.h"                   /* PROFILING */
 #include "scheduler.h"                     /* thd_scheduler */
@@ -49,6 +52,7 @@ class Load_log_event;
 class Slave_log_event;
 class sp_rcontext;
 class sp_cache;
+class Lex_input_stream;
 class Parser_state;
 class Rows_log_event;
 class Sroutine_hash_entry;
@@ -74,7 +78,7 @@ enum enum_filetype { FILETYPE_CSV, FILETYPE_XML };
 #define MODE_PIPES_AS_CONCAT            2
 #define MODE_ANSI_QUOTES                4
 #define MODE_IGNORE_SPACE               8
-#define MODE_NOT_USED                   16
+#define MODE_IGNORE_BAD_TABLE_OPTIONS   16
 #define MODE_ONLY_FULL_GROUP_BY         32
 #define MODE_NO_UNSIGNED_SUBTRACTION    64
 #define MODE_NO_DIR_IN_CREATE           128
@@ -163,7 +167,7 @@ typedef struct st_user_var_events
 
 #define RP_LOCK_LOG_IS_ALREADY_LOCKED 1
 #define RP_FORCE_ROTATE               2
-
+#define RP_BINLOG_CHECKSUM_ALG_CHANGE 4
 /*
   The COPY_INFO structure is used by INSERT/REPLACE code.
   The schema of the row counting by the INSERT/INSERT ... ON DUPLICATE KEY
@@ -257,19 +261,22 @@ public:
   KEY_CREATE_INFO key_create_info;
   List<Key_part_spec> columns;
   LEX_STRING name;
+  engine_option_value *option_list;
   bool generated;
 
   Key(enum Keytype type_par, const LEX_STRING &name_arg,
       KEY_CREATE_INFO *key_info_arg,
-      bool generated_arg, List<Key_part_spec> &cols)
+      bool generated_arg, List<Key_part_spec> &cols,
+      engine_option_value *create_opt)
     :type(type_par), key_create_info(*key_info_arg), columns(cols),
-    name(name_arg), generated(generated_arg)
+    name(name_arg), option_list(create_opt), generated(generated_arg)
   {}
   Key(enum Keytype type_par, const char *name_arg, size_t name_len_arg,
       KEY_CREATE_INFO *key_info_arg, bool generated_arg,
-      List<Key_part_spec> &cols)
+      List<Key_part_spec> &cols,
+      engine_option_value *create_opt)
     :type(type_par), key_create_info(*key_info_arg), columns(cols),
-    generated(generated_arg)
+    option_list(create_opt), generated(generated_arg)
   {
     name.str= (char *)name_arg;
     name.length= name_len_arg;
@@ -301,7 +308,7 @@ public:
   Foreign_key(const LEX_STRING &name_arg, List<Key_part_spec> &cols,
 	      Table_ident *table,   List<Key_part_spec> &ref_cols,
 	      uint delete_opt_arg, uint update_opt_arg, uint match_opt_arg)
-    :Key(FOREIGN_KEY, name_arg, &default_key_create_info, 0, cols),
+    :Key(FOREIGN_KEY, name_arg, &default_key_create_info, 0, cols, NULL),
     ref_table(table), ref_columns(ref_cols),
     delete_opt(delete_opt_arg), update_opt(update_opt_arg),
     match_opt(match_opt_arg)
@@ -313,6 +320,8 @@ public:
   */
   virtual Key *clone(MEM_ROOT *mem_root) const
   { return new (mem_root) Foreign_key(*this, mem_root); }
+  /* Used to validate foreign key options */
+  bool validate(List<Create_field> &table_fields);
 };
 
 typedef struct st_mysql_lock
@@ -404,13 +413,16 @@ typedef struct system_variables
 {
   /*
     How dynamically allocated system variables are handled:
-    
+
     The global_system_variables and max_system_variables are "authoritative"
     They both should have the same 'version' and 'size'.
     When attempting to access a dynamic variable, if the session version
     is out of date, then the session version is updated and realloced if
     neccessary and bytes copied from global to make up for missing data.
-  */ 
+
+    Note that one should use my_bool instead of bool here, as the variables
+    are used with my_getopt.c
+  */
   ulong dynamic_variables_version;
   char* dynamic_variables_ptr;
   uint dynamic_variables_head;  /* largest valid variable offset */
@@ -422,12 +434,16 @@ typedef struct system_variables
   ulonglong optimizer_switch;
   ulonglong sql_mode; ///< which non-standard SQL behaviour should be enabled
   ulonglong option_bits; ///< OPTION_xxx constants, e.g. OPTION_PROFILING
+  ulonglong join_buff_space_limit;
+  ulonglong log_slow_filter; 
+  ulonglong log_slow_verbosity; 
   ha_rows select_limit;
   ha_rows max_join_size;
   ulong auto_increment_increment, auto_increment_offset;
   ulong bulk_insert_buff_size;
   ulong join_buff_size;
   ulong lock_wait_timeout;
+  ulong join_cache_level;
   ulong max_allowed_packet;
   ulong max_error_count;
   ulong max_length_for_sort_data;
@@ -448,8 +464,11 @@ typedef struct system_variables
   ulong profiling_history_size;
   ulong read_buff_size;
   ulong read_rnd_buff_size;
+  ulong mrr_buff_size;
   ulong div_precincrement;
   ulong sortbuff_size;
+  /* Total size of all buffers used by the subselect_rowid_merge_engine. */
+  ulong rowid_merge_buff_size;
   ulong max_sp_recursion_depth;
   ulong default_week_format;
   ulong max_seeks_for_key;
@@ -460,8 +479,11 @@ typedef struct system_variables
   ulong trans_prealloc_size;
   ulong log_warnings;
   ulong group_concat_max_len;
-
+  /* Flags for slow log filtering */
+  ulong log_slow_rate_limit; 
   ulong binlog_format; ///< binlog format for this thd (see enum_binlog_format)
+  ulong progress_report_time;
+  my_bool binlog_annotate_rows_events;
   my_bool binlog_direct_non_trans_update;
   my_bool sql_log_bin;
   ulong completion_type;
@@ -476,11 +498,11 @@ typedef struct system_variables
   my_thread_id pseudo_thread_id;
 
   my_bool low_priority_updates;
-  my_bool new_mode;
   my_bool query_cache_wlock_invalidate;
   my_bool engine_condition_pushdown;
   my_bool keep_files_on_create;
 
+  my_bool old_mode;
   my_bool old_alter_table;
   my_bool old_passwords;
   my_bool big_tables;
@@ -506,11 +528,13 @@ typedef struct system_variables
 
   my_bool sysdate_is_now;
 
-  double long_query_time_double;
+  /* deadlock detection */
+  ulong wt_timeout_short, wt_deadlock_search_depth_short;
+  ulong wt_timeout_long, wt_deadlock_search_depth_long;
 
+  double long_query_time_double;
 } SV;
 
-
 /**
   Per thread status variables.
   Must be long/ulong up to last_system_status_var so that
@@ -532,14 +556,25 @@ typedef struct system_status_var
   ulong ha_read_prev_count;
   ulong ha_read_rnd_count;
   ulong ha_read_rnd_next_count;
+  /*
+    This number doesn't include calls to the default implementation and
+    calls made by range access. The intent is to count only calls made by
+    BatchedKeyAccess.
+  */
+  ulong ha_multi_range_read_init_count;
+
   ulong ha_rollback_count;
   ulong ha_update_count;
   ulong ha_write_count;
+  /* The following are for internal temporary tables */
+  ulong ha_tmp_update_count;
+  ulong ha_tmp_write_count;
   ulong ha_prepare_count;
   ulong ha_discover_count;
   ulong ha_savepoint_count;
   ulong ha_savepoint_rollback_count;
 
+#if 0
   /* KEY_CACHE parts. These are copies of the original */
   ulong key_blocks_changed;
   ulong key_blocks_used;
@@ -548,6 +583,7 @@ typedef struct system_status_var
   ulong key_cache_w_requests;
   ulong key_cache_write;
   /* END OF KEY_CACHE parts */
+#endif
 
   ulong net_big_packet_count;
   ulong opened_tables;
@@ -570,25 +606,33 @@ typedef struct system_status_var
   ulong com_stmt_fetch;
   ulong com_stmt_reset;
   ulong com_stmt_close;
+
+  ulong empty_queries;
+  ulong access_denied_errors;
+  ulong lost_connections;
   /*
     Number of statements sent from the client
   */
   ulong questions;
-
-  ulonglong bytes_received;
-  ulonglong bytes_sent;
   /*
     IMPORTANT!
     SEE last_system_status_var DEFINITION BELOW.
     Below 'last_system_status_var' are all variables that cannot be handled
     automatically by add_to_status()/add_diff_to_status().
   */
+  ulonglong bytes_received;
+  ulonglong bytes_sent;
+  ulonglong rows_read;
+  ulonglong rows_sent;
+  ulonglong rows_tmp_read;
+  ulonglong binlog_bytes_written;
   double last_query_cost;
+  double cpu_time, busy_time;
 } STATUS_VAR;
 
 /*
   This is used for 'SHOW STATUS'. It must be updated to the last ulong
-  variable in system_status_var which is makes sens to add to the global
+  variable in system_status_var which is makes sense to add to the global
   counter
 */
 
@@ -659,6 +703,8 @@ public:
   { return (int)state < (int)STMT_PREPARED; }
   inline bool is_stmt_prepare_or_first_stmt_execute() const
   { return (int)state <= (int)STMT_PREPARED; }
+  inline bool is_stmt_execute() const
+  { return state == STMT_PREPARED || state == STMT_EXECUTED; }
   inline bool is_conventional() const
   { return state == STMT_CONVENTIONAL_EXECUTION; }
 
@@ -703,7 +749,7 @@ class Server_side_cursor;
    - prepared, that is, contain placeholders,
    - opened as cursors. We maintain 1 to 1 relationship between
      statement and cursor - if user wants to create another cursor for his
-     query, we create another statement for it. 
+     query, we create another statement for it.
   To perform some action with statement we reset THD part to the state  of
   that statement, do the action, and then save back modified state from THD
   to the statement. It will be changed in near future, and Statement will
@@ -753,6 +799,12 @@ public:
     ENGINE INNODB STATUS.
   */
   CSET_STRING query_string;
+  /*
+    If opt_query_cache_strip_comments is set, this contains query without
+    comments. If not set, it contains pointer to query_string.
+  */
+  String base_query;
+
 
   inline char *query() const { return query_string.str(); }
   inline uint32 query_length() const { return query_string.length(); }
@@ -786,7 +838,8 @@ public:
   char *db;
   size_t db_length;
 
-public:
+  /* This is set to 1 of last call to send_result_to_client() was ok */
+  my_bool query_cache_is_applicable;
 
   /* This constructor is called for backup statements */
   Statement() {}
@@ -923,7 +976,7 @@ public:
   {
     return (*priv_host ? priv_host : (char *)"%");
   }
-  
+
   bool set_user(char *user_arg);
 
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
@@ -1130,6 +1183,7 @@ public:
   ulonglong limit_found_rows;
   ha_rows    cuted_fields, sent_row_count, examined_row_count;
   ulong client_capabilities;
+  ulong query_plan_flags; 
   uint in_sub_stmt;
   bool enable_slow_log;
   bool last_insert_id_used;
@@ -1240,6 +1294,7 @@ public:
     /* Ignore error */
     return TRUE;
   }
+  Dummy_error_handler() {}                    /* Remove gcc warning */
 };
 
 
@@ -1448,7 +1503,7 @@ public:
   /* Used to execute base64 coded binlog events in MySQL server */
   Relay_log_info* rli_fake;
 
-  void reset_for_next_command();
+  void reset_for_next_command(bool calculate_userstat);
   /*
     Constant for THD::where initialization in the beginning of every query.
 
@@ -1480,15 +1535,17 @@ public:
   Query_cache_tls query_cache_tls;
 #endif
   NET	  net;				// client connection descriptor
+  scheduler_functions *scheduler;       // Scheduler for this connection
   Protocol *protocol;			// Current protocol
   Protocol_text   protocol_text;	// Normal protocol
   Protocol_binary protocol_binary;	// Binary protocol
   HASH    user_vars;			// hash for user variables
   String  packet;			// dynamic buffer for network I/O
   String  convert_buffer;               // buffer for charset conversions
-  struct  rand_struct rand;		// used for authentication
+  struct  my_rnd_struct rand;		// used for authentication
   struct  system_variables variables;	// Changeable local variables
   struct  system_status_var status_var; // Per thread statistic vars
+  struct  system_status_var org_status_var; // For user statistics
   struct  system_status_var *initial_status_var; /* used by show status */
   THR_LOCK_INFO lock_info;              // Locking info of this thread
   /**
@@ -1557,7 +1614,7 @@ public:
   /*
     One thread can hold up to one named user-level lock. This variable
     points to a lock object if the lock is present. See item_func.cc and
-    chapter 'Miscellaneous functions', for functions GET_LOCK, RELEASE_LOCK. 
+    chapter 'Miscellaneous functions', for functions GET_LOCK, RELEASE_LOCK.
   */
   User_level_lock *ull;
 #ifndef DBUG_OFF
@@ -1572,21 +1629,64 @@ public:
   uint32     server_id;
   uint32     file_id;			// for LOAD DATA INFILE
   /* remote (peer) port */
-  uint16 peer_port;
-  time_t     start_time, user_time;
+  uint16     peer_port;
+  my_time_t  start_time;             // start_time and its sec_part 
+  ulong      start_time_sec_part;    // are almost always used separately
+  my_hrtime_t user_time;
   // track down slow pthread_create
   ulonglong  prior_thr_create_utime, thr_create_utime;
   ulonglong  start_utime, utime_after_lock;
 
+  // Process indicator
+  struct {
+    /*
+      true, if the currently running command can send progress report
+      packets to a client. Set by mysql_execute_command() for safe commands
+      See CF_REPORT_PROGRESS
+    */
+    bool       report_to_client;
+    /*
+      true, if we will send progress report packets to a client
+      (client has requested them, see CLIENT_PROGRESS; report_to_client
+      is true; not in sub-statement)
+    */
+    bool       report;
+    uint       stage, max_stage;
+    ulonglong  counter, max_counter;
+    ulonglong  next_report_time;
+    Query_arena *arena;
+  } progress;
+
   thr_lock_type update_lock_default;
   Delayed_insert *di;
 
   /* <> 0 if we are inside of trigger or stored function. */
   uint in_sub_stmt;
+  /* True when opt_userstat_running is set at start of query */
+  bool userstat_running;
 
   /* container for handler's private per-connection data */
   Ha_data ha_data[MAX_HA];
 
+  /* Place to store various things */
+  union 
+  { 
+    /*
+      Used by subquery optimizations, see Item_in_subselect::emb_on_expr_nest.
+    */
+    TABLE_LIST *emb_on_expr_nest;
+  } thd_marker;
+
+  bool prepare_derived_at_open;
+
+  /* 
+    To signal that the tmp table to be created is created for materialized
+    derived table or a view.
+  */ 
+  bool create_tmp_table_for_derived;
+
+  bool save_prep_leaf_list;
+
 #ifndef MYSQL_CLIENT
   int binlog_setup_trx_data();
 
@@ -1595,7 +1695,8 @@ public:
   */
   void binlog_start_trans_and_stmt();
   void binlog_set_stmt_begin();
-  int binlog_write_table_map(TABLE *table, bool is_transactional);
+  int binlog_write_table_map(TABLE *table, bool is_transactional,
+                             my_bool *with_annotate= 0);
   int binlog_write_row(TABLE* table, bool is_transactional,
                        MY_BITMAP const* cols, size_t colcnt,
                        const uchar *buf);
@@ -1687,6 +1788,7 @@ public:
     THD_TRANS stmt;			// Trans for current statement
     bool on;                            // see ha_enable_transaction()
     XID_STATE xid_state;
+    WT_THD wt;                          ///< for deadlock detection
     Rows_log_event *m_pending_rows_event;
 
     /*
@@ -1732,7 +1834,7 @@ public:
   /*
     This is to track items changed during execution of a prepared
     statement/stored procedure. It's created by
-    register_item_tree_change() in memory root of THD, and freed in
+    nocheck_register_item_tree_change() in memory root of THD, and freed in
     rollback_item_tree_changes(). For conventional execution it's always
     empty.
   */
@@ -1982,9 +2084,11 @@ public:
   /* Statement id is thread-wide. This counter is used to generate ids */
   ulong      statement_id_counter;
   ulong	     rand_saved_seed1, rand_saved_seed2;
+  ulong      query_plan_flags; 
+  ulong      query_plan_fsort_passes; 
   pthread_t  real_id;                           /* For debugging */
   my_thread_id  thread_id;
-  uint	     tmp_table;
+  uint	     tmp_table, global_disable_checkpoint;
   uint	     server_status,open_options;
   enum enum_thread_type system_thread;
   uint       select_number;             //number of select (used for EXPLAIN)
@@ -2038,8 +2142,11 @@ public:
   char	     scramble[SCRAMBLE_LENGTH+1];
 
   bool       slave_thread, one_shot_set;
+  bool       extra_port;                        /* If extra connection */
+
   bool	     no_errors;
-  uchar      password;
+  uint8      password;
+
   /**
     Set to TRUE if execution of the current compound statement
     can not continue. In particular, disables activation of
@@ -2067,6 +2174,7 @@ public:
   */
   bool       is_fatal_sub_stmt_error;
   bool	     query_start_used, rand_used, time_zone_used;
+  bool       query_start_sec_part_used;
   /* for IS NULL => = last_insert_id() fix in remove_eq_conds() */
   bool       substitute_null_with_insert_id;
   bool	     in_lock_tables;
@@ -2078,7 +2186,7 @@ public:
   */
   bool       is_slave_error;
   bool       bootstrap, cleanup_done;
-  
+
   /**  is set if some thread specific value(s) used in a statement. */
   bool       thread_specific_used;
   /**  
@@ -2092,7 +2200,7 @@ public:
   bool 	     got_warning;       /* Set on call to push_warning() */
   /* set during loop of derived table processing */
   bool       derived_tables_processing;
-  my_bool    tablespace_op;	/* This is TRUE in DISCARD/IMPORT TABLESPACE */
+  bool       tablespace_op;	/* This is TRUE in DISCARD/IMPORT TABLESPACE */
 
   sp_rcontext *spcont;		// SP runtime context
   sp_cache   *sp_proc_cache;
@@ -2108,6 +2216,21 @@ public:
   */
   LOG_INFO*  current_linfo;
   NET*       slave_net;			// network connection from slave -> m.
+
+  /*
+    Used to update global user stats.  The global user stats are updated
+    occasionally with the 'diff' variables.  After the update, the 'diff'
+    variables are reset to 0.
+  */
+  /* Time when the current thread connected to MySQL. */
+  time_t current_connect_time;
+  /* Last time when THD stats were updated in global_user_stats. */
+  time_t last_global_update_time;
+  /* Number of commands not reflected in global_user_stats yet. */
+  uint select_commands, update_commands, other_commands;
+  ulonglong start_cpu_time;
+  ulonglong start_bytes_received;
+
   /* Used by the sys_var class to store temporary values */
   union
   {
@@ -2115,11 +2238,12 @@ public:
     long      long_value;
     ulong     ulong_value;
     ulonglong ulonglong_value;
+    double    double_value;
   } sys_var_tmp;
-  
+
   struct {
-    /* 
-      If true, mysql_bin_log::write(Log_event) call will not write events to 
+    /*
+      If true, mysql_bin_log::write(Log_event) call will not write events to
       binlog, and maintain 2 below variables instead (use
       mysql_bin_log.start_union_events to turn this on)
     */
@@ -2130,13 +2254,13 @@ public:
     */
     bool unioned_events;
     /*
-      If TRUE, at least one mysql_bin_log::write(Log_event e), where 
-      e.cache_stmt == TRUE call has been made after last 
+      If TRUE, at least one mysql_bin_log::write(Log_event e), where
+      e.cache_stmt == TRUE call has been made after last
       mysql_bin_log.start_union_events() call.
     */
     bool unioned_events_trans;
-    
-    /* 
+
+    /*
       'queries' (actually SP statements) that run under inside this binlog
       union have thd->query_id >= first_query_id.
     */
@@ -2185,14 +2309,16 @@ public:
     killing mysqld) where it's vital to not allocate excessive and not used
     memory. Note, that we still don't return error from init_for_queries():
     if preallocation fails, we should notice that at the first call to
-    alloc_root. 
+    alloc_root.
   */
   void init_for_queries();
+  void update_all_stats();
+  void update_stats(void);
   void change_user(void);
   void cleanup(void);
   void cleanup_after_query();
   bool store_globals();
-  bool restore_globals();
+  void reset_globals();
 #ifdef SIGNAL_WITH_VIO_CLOSE
   inline void set_active_vio(Vio* vio)
   {
@@ -2223,7 +2349,7 @@ public:
     
     QUERY_TYPE_COUNT
   };
-  
+
   int binlog_query(enum_binlog_query_type qtype,
                    char const *query, ulong query_len, bool is_trans,
                    bool direct, bool suppress_use,
@@ -2261,33 +2387,45 @@ public:
     mysql_mutex_unlock(&mysys_var->mutex);
     return;
   }
-  inline time_t query_start() { query_start_used=1; return start_time; }
-  inline void set_time()
+  inline my_time_t query_start() { query_start_used=1; return start_time; }
+  inline ulong query_start_sec_part()
+  { query_start_sec_part_used=1; return start_time_sec_part; }
+  inline void set_current_time()
   {
-    if (user_time)
+    my_hrtime_t hrtime= my_hrtime();
+    start_time= hrtime_to_my_time(hrtime);
+    start_time_sec_part= hrtime_sec_part(hrtime);
+  }
+  inline void set_start_time()
+  {
+    if (user_time.val)
     {
-      start_time= user_time;
-      start_utime= utime_after_lock= my_micro_time();
+      start_time= hrtime_to_my_time(user_time);
+      start_time_sec_part= hrtime_sec_part(user_time);
     }
     else
-      start_utime= utime_after_lock= my_micro_time_and_time(&start_time);
+      set_current_time();
+  }
+  inline void set_time()
+  {
+    set_start_time();
+    start_utime= utime_after_lock= microsecond_interval_timer();
   }
-  inline void	set_current_time()    { start_time= my_time(MY_WME); }
-  inline void	set_time(time_t t)
+  inline void	set_time(my_hrtime_t t)
   {
-    start_time= user_time= t;
-    start_utime= utime_after_lock= my_micro_time();
+    user_time= t;
+    set_time();
   }
-  /*TODO: this will be obsolete when we have support for 64 bit my_time_t */
-  inline bool	is_valid_time() 
-  { 
-    return (IS_TIME_T_VALID_FOR_TIMESTAMP(start_time));
+  inline void	set_time(my_time_t t, ulong sec_part)
+  {
+    my_hrtime_t hrtime= { hrtime_from_time(t) + sec_part };
+    set_time(hrtime);
   }
-  void set_time_after_lock()  { utime_after_lock= my_micro_time(); }
-  ulonglong current_utime()  { return my_micro_time(); }
+  void set_time_after_lock()  { utime_after_lock= microsecond_interval_timer(); }
+  ulonglong current_utime()  { return microsecond_interval_timer(); }
+
   /**
    Update server status after execution of a top level statement.
-
    Currently only checks if a query was slow, and assigns
    the status accordingly.
    Evaluate the current time, and if it exceeds the long-query-time
@@ -2475,8 +2613,30 @@ public:
       nocheck_register_item_tree_change(place, *place, mem_root);
     *place= new_value;
   }
+  /**
+    Make change in item tree after checking whether it needs registering
+
+
+    @param place         place where we should assign new value
+    @param new_value     place of the new value
+
+    @details
+    see check_and_register_item_tree_change details
+  */
+  void check_and_register_item_tree(Item **place, Item **new_value)
+  {
+    if (!stmt_arena->is_conventional())
+      check_and_register_item_tree_change(place, new_value, mem_root);
+    /*
+      We have to use memcpy instead of  *place= *new_value merge to
+      avoid problems with strict aliasing.
+    */
+    memcpy((char*) place, new_value, sizeof(*new_value));
+  }
   void nocheck_register_item_tree_change(Item **place, Item *old_value,
                                          MEM_ROOT *runtime_memroot);
+  void check_and_register_item_tree_change(Item **place, Item **new_value,
+                                           MEM_ROOT *runtime_memroot);
   void rollback_item_tree_changes();
 
   /*
@@ -2658,7 +2818,7 @@ public:
     *p_db_length= db_length;
     return FALSE;
   }
-  thd_scheduler scheduler;
+  thd_scheduler event_scheduler;
 
 public:
   inline Internal_error_handler *get_internal_handler()
@@ -2816,6 +2976,35 @@ public:
   LEX_STRING get_invoker_user() { return invoker_user; }
   LEX_STRING get_invoker_host() { return invoker_host; }
   bool has_invoker() { return invoker_user.length > 0; }
+
+private:
+  /* 
+    This reference points to the table arena when the expression
+    for a virtual column is being evaluated
+  */ 
+  Query_arena *arena_for_cached_items;
+
+public:
+  void reset_arena_for_cached_items(Query_arena *new_arena)
+  {
+    arena_for_cached_items= new_arena;
+  }
+  Query_arena *switch_to_arena_for_cached_items(Query_arena *backup)
+  {
+    if (!arena_for_cached_items)
+      return 0;
+    set_n_backup_active_arena(arena_for_cached_items, backup);
+    return backup;
+  }
+
+  void clear_wakeup_ready() { wakeup_ready= false; }
+  /*
+    Sleep waiting for others to wake us up with signal_wakeup_ready().
+    Must call clear_wakeup_ready() before waiting.
+  */
+  void wait_for_wakeup_ready();
+  /* Wake this thread up from wait_for_wakeup_ready(). */
+  void signal_wakeup_ready();
 private:
 
   /** The current internal error handler for this thread, or NULL. */
@@ -2845,7 +3034,7 @@ private:
     statements or default definer is set in CREATE/ALTER SP, SF, Event,
     TRIGGER or VIEW statements.
 
-    Current user will be binlogged into Query_log_event if current_user_used
+    Current user will be binlogged into Query_log_event if m_binlog_invoker
     is TRUE; It will be stored into invoker_host and invoker_user by SQL thread.
    */
   bool m_binlog_invoker;
@@ -2858,6 +3047,16 @@ private:
    */
   LEX_STRING invoker_user;
   LEX_STRING invoker_host;
+  /*
+    Flag, mutex and condition for a thread to wait for a signal from another
+    thread.
+
+    Currently used to wait for group commit to complete, can also be used for
+    other purposes.
+  */
+  bool wakeup_ready;
+  mysql_mutex_t LOCK_wakeup_ready;
+  mysql_cond_t COND_wakeup_ready;
 };
 
 
@@ -2888,10 +3087,26 @@ my_eof(THD *thd)
 #define reenable_binlog(A)   (A)->variables.option_bits= tmp_disable_binlog__save_options;}
 
 
-LEX_STRING *
-make_lex_string_root(MEM_ROOT *mem_root,
-                     LEX_STRING *lex_str, const char* str, uint length,
-                     bool allocate_lex_string);
+/*
+  These functions are for making it later easy to add strict
+  checking for all date handling.
+*/
+
+const my_bool strict_date_checking= 0;
+
+inline ulong sql_mode_for_dates(THD *thd)
+{
+  if (strict_date_checking)
+    return (thd->variables.sql_mode &
+            (MODE_NO_ZERO_DATE | MODE_NO_ZERO_IN_DATE |
+             MODE_INVALID_DATES));
+  return (thd->variables.sql_mode & MODE_INVALID_DATES);
+}
+
+inline ulong sql_mode_for_dates()
+{
+  return sql_mode_for_dates(current_thd);
+}
 
 /*
   Used to hold information about file and file structure in exchange
@@ -2941,7 +3156,11 @@ public:
   virtual uint field_count(List<Item> &fields) const
   { return fields.elements; }
   virtual bool send_result_set_metadata(List<Item> &list, uint flags)=0;
-  virtual bool send_data(List<Item> &items)=0;
+  /*
+    send_data returns 0 on ok, 1 on error and -1 if data was ignored, for
+    example for a duplicate row entry written to a temp table.
+  */
+  virtual int send_data(List<Item> &items)=0;
   virtual bool initialize_tables (JOIN *join=0) { return 0; }
   virtual void send_error(uint errcode,const char *err);
   virtual bool send_eof()=0;
@@ -2977,7 +3196,12 @@ public:
 class select_result_interceptor: public select_result
 {
 public:
-  select_result_interceptor() {}              /* Remove gcc warning */
+  select_result_interceptor()
+  {
+    DBUG_ENTER("select_result_interceptor::select_result_interceptor");
+    DBUG_PRINT("enter", ("this 0x%lx", (ulong) this));
+    DBUG_VOID_RETURN;
+  }              /* Remove gcc warning */
   uint field_count(List<Item> &fields) const { return 0; }
   bool send_result_set_metadata(List<Item> &fields, uint flag) { return FALSE; }
 };
@@ -2993,7 +3217,7 @@ class select_send :public select_result {
 public:
   select_send() :is_result_set_started(FALSE) {}
   bool send_result_set_metadata(List<Item> &list, uint flags);
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
   bool send_eof();
   virtual bool check_simple_select() const { return FALSE; }
   void abort_result_set();
@@ -3056,7 +3280,7 @@ public:
   select_export(sql_exchange *ex) :select_to_file(ex) {}
   ~select_export();
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
 };
 
 
@@ -3064,7 +3288,7 @@ class select_dump :public select_to_file {
 public:
   select_dump(sql_exchange *ex) :select_to_file(ex) {}
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
 };
 
 
@@ -3083,7 +3307,7 @@ class select_insert :public select_result_interceptor {
   ~select_insert();
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
   virtual int prepare2(void);
-  bool send_data(List<Item> &items);
+  virtual int send_data(List<Item> &items);
   virtual void store_values(List<Item> &values);
   virtual bool can_rollback_data() { return 0; }
   void send_error(uint errcode,const char *err);
@@ -3135,8 +3359,18 @@ public:
 
 #include <myisam.h>
 
-/* 
-  Param to create temporary tables when doing SELECT:s 
+#ifdef WITH_ARIA_STORAGE_ENGINE
+#include <maria.h>
+#endif
+
+#ifdef USE_ARIA_FOR_TMP_TABLES
+#define ENGINE_COLUMNDEF MARIA_COLUMNDEF
+#else
+#define ENGINE_COLUMNDEF MI_COLUMNDEF
+#endif
+
+/*
+  Param to create temporary tables when doing SELECT:s
   NOTE
     This structure is copied using memcpy as a part of JOIN.
 */
@@ -3155,7 +3389,7 @@ public:
   Copy_field *save_copy_field, *save_copy_field_end;
   uchar	    *group_buff;
   Item	    **items_to_copy;			/* Fields in tmp table */
-  MI_COLUMNDEF *recinfo,*start_recinfo;
+  ENGINE_COLUMNDEF *recinfo, *start_recinfo;
   KEY *keyinfo;
   ha_rows end_write_records;
   /**
@@ -3189,9 +3423,11 @@ public:
   uint	quick_group;
   bool  using_indirect_summary_function;
   /* If >0 convert all blob fields to varchar(convert_blob_length) */
-  uint  convert_blob_length; 
-  CHARSET_INFO *table_charset; 
+  uint  convert_blob_length;
+  CHARSET_INFO *table_charset;
   bool schema_table;
+  /* TRUE if the temp table is created for subquery materialization. */
+  bool materialized_subquery;
   /*
     True if GROUP BY and its aggregate functions are already computed
     by a table access method (e.g. by loose index scan). In this case
@@ -3200,11 +3436,23 @@ public:
   */
   bool precomputed_group_by;
   bool force_copy_fields;
+  /*
+    If TRUE, create_tmp_field called from create_tmp_table will convert
+    all BIT fields to 64-bit longs. This is a workaround the limitation
+    that MEMORY tables cannot index BIT columns.
+  */
+  bool bit_fields_as_long;
+  /*
+    Whether to create or postpone actual creation of this temporary table.
+    TRUE <=> create_tmp_table will create only the TABLE structure.
+  */
+  bool skip_create_table;
 
   TMP_TABLE_PARAM()
     :copy_field(0), group_parts(0),
      group_length(0), group_null_parts(0), convert_blob_length(0),
-     schema_table(0), precomputed_group_by(0), force_copy_fields(0)
+    schema_table(0), materialized_subquery(0), precomputed_group_by(0),
+    force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
   {}
   ~TMP_TABLE_PARAM()
   {
@@ -3223,19 +3471,25 @@ public:
 
 class select_union :public select_result_interceptor
 {
+public:
   TMP_TABLE_PARAM tmp_table_param;
+  int write_err; /* Error code from the last send_data->ha_write_row call. */
 public:
   TABLE *table;
+  ha_rows records;
 
-  select_union() :table(0) {}
+  select_union() :write_err(0), table(0), records(0) { tmp_table_param.init(); }
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
   bool send_eof();
   bool flush();
-
-  bool create_result_table(THD *thd, List<Item> *column_types,
-                           bool is_distinct, ulonglong options,
-                           const char *alias);
+  void cleanup();
+  virtual bool create_result_table(THD *thd, List<Item> *column_types,
+                                   bool is_distinct, ulonglong options,
+                                   const char *alias, 
+                                   bool bit_fields_as_long,
+                                   bool create_table);
+  TMP_TABLE_PARAM *get_tmp_table_param() { return &tmp_table_param; }
 };
 
 /* Base subselect interface class */
@@ -3245,7 +3499,7 @@ protected:
   Item_subselect *item;
 public:
   select_subselect(Item_subselect *item);
-  bool send_data(List<Item> &items)=0;
+  int send_data(List<Item> &items)=0;
   bool send_eof() { return 0; };
 };
 
@@ -3256,9 +3510,77 @@ public:
   select_singlerow_subselect(Item_subselect *item_arg)
     :select_subselect(item_arg)
   {}
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
 };
 
+
+/*
+  This class specializes select_union to collect statistics about the
+  data stored in the temp table. Currently the class collects statistcs
+  about NULLs.
+*/
+
+class select_materialize_with_stats : public select_union
+{
+protected:
+  class Column_statistics
+  {
+  public:
+    /* Count of NULLs per column. */
+    ha_rows null_count;
+    /* The row number that contains the first NULL in a column. */
+    ha_rows min_null_row;
+    /* The row number that contains the last NULL in a column. */
+    ha_rows max_null_row;
+  };
+
+  /* Array of statistics data per column. */
+  Column_statistics* col_stat;
+
+  /*
+    The number of columns in the biggest sub-row that consists of only
+    NULL values.
+  */
+  uint max_nulls_in_row;
+  /*
+    Count of rows writtent to the temp table. This is redundant as it is
+    already stored in handler::stats.records, however that one is relatively
+    expensive to compute (given we need that for evry row).
+  */
+  ha_rows count_rows;
+
+protected:
+  void reset();
+
+public:
+  select_materialize_with_stats() { tmp_table_param.init(); }
+  bool create_result_table(THD *thd, List<Item> *column_types,
+                           bool is_distinct, ulonglong options,
+                           const char *alias, 
+                           bool bit_fields_as_long,
+                           bool create_table);
+  bool init_result_table(ulonglong select_options);
+  int send_data(List<Item> &items);
+  void cleanup();
+  ha_rows get_null_count_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].null_count;
+  }
+  ha_rows get_max_null_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].max_null_row;
+  }
+  ha_rows get_min_null_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].min_null_row;
+  }
+  uint get_max_nulls_in_row() { return max_nulls_in_row; }
+};
+
+
 /* used in independent ALL/ANY optimisation */
 class select_max_min_finder_subselect :public select_subselect
 {
@@ -3270,7 +3592,7 @@ public:
     :select_subselect(item_arg), cache(0), fmax(mx)
   {}
   void cleanup();
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
   bool cmp_real();
   bool cmp_int();
   bool cmp_decimal();
@@ -3283,9 +3605,70 @@ class select_exists_subselect :public select_subselect
 public:
   select_exists_subselect(Item_subselect *item_arg)
     :select_subselect(item_arg){}
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
 };
 
+
+
+
+/*
+  Optimizer and executor structure for the materialized semi-join info. This
+  structure contains
+   - The sj-materialization temporary table
+   - Members needed to make index lookup or a full scan of the temptable.
+*/
+class SJ_MATERIALIZATION_INFO : public Sql_alloc
+{
+public:
+  /* Optimal join sub-order */
+  struct st_position *positions;
+
+  uint tables; /* Number of tables in the sj-nest */
+
+  /* Expected #rows in the materialized table */
+  double rows;
+
+  /* 
+    Cost to materialize - execute the sub-join and write rows into temp.table
+  */
+  COST_VECT materialization_cost;
+
+  /* Cost to make one lookup in the temptable */
+  COST_VECT lookup_cost;
+  
+  /* Cost of scanning the materialized table */
+  COST_VECT scan_cost;
+
+  /* --- Execution structures ---------- */
+  
+  /*
+    TRUE <=> This structure is used for execution. We don't necessarily pick
+    sj-materialization, so some of SJ_MATERIALIZATION_INFO structures are not
+    used by materialization
+  */
+  bool is_used;
+  
+  bool materialized; /* TRUE <=> materialization already performed */
+  /*
+    TRUE  - the temptable is read with full scan
+    FALSE - we use the temptable for index lookups
+  */
+  bool is_sj_scan; 
+  
+  /* The temptable and its related info */
+  TMP_TABLE_PARAM sjm_table_param;
+  List<Item> sjm_table_cols;
+  TABLE *table;
+
+  /* Structure used to make index lookups */
+  struct st_table_ref *tab_ref;
+  Item *in_equality; /* See create_subq_in_equalities() */
+
+  Item *join_cond; /* See comments in make_join_select() */
+  Copy_field *copy_field; /* Needed for SJ_Materialization scan */
+};
+
+
 /* Structs used when sorting */
 
 typedef struct st_sort_field {
@@ -3324,7 +3707,7 @@ public:
     else
       db= db_arg;
   }
-  inline Table_ident(LEX_STRING table_arg) 
+  inline Table_ident(LEX_STRING table_arg)
     :table(table_arg), sel((SELECT_LEX_UNIT *)0)
   {
     db.str=0;
@@ -3362,15 +3745,16 @@ class user_var_entry
   Item_result type;
   bool unsigned_flag;
 
-  double val_real(my_bool *null_value);
-  longlong val_int(my_bool *null_value) const;
-  String *val_str(my_bool *null_value, String *str, uint decimals);
-  my_decimal *val_decimal(my_bool *null_value, my_decimal *result);
+  double val_real(bool *null_value);
+  longlong val_int(bool *null_value) const;
+  String *val_str(bool *null_value, String *str, uint decimals);
+  my_decimal *val_decimal(bool *null_value, my_decimal *result);
   DTCollation collation;
 };
 
+
 /*
-   Unique -- class for unique (removing of duplicates). 
+   Unique -- class for unique (removing of duplicates).
    Puts all values to the TREE. If the tree becomes too big,
    it's dumped to the file. User can request sorted values, or
    just iterate through them. In the last case tree merging is performed in
@@ -3385,32 +3769,48 @@ class Unique :public Sql_alloc
   IO_CACHE file;
   TREE tree;
   uchar *record_pointers;
+  ulong filtered_out_elems;
   bool flush();
   uint size;
+  uint full_size;
+  uint min_dupl_count;   /* always 0 for unions, > 0 for intersections */
 
 public:
   ulong elements;
   Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
-	 uint size_arg, ulonglong max_in_memory_size_arg);
+	 uint size_arg, ulonglong max_in_memory_size_arg,
+         uint min_dupl_count_arg= 0);
   ~Unique();
   ulong elements_in_tree() { return tree.elements_in_tree; }
   inline bool unique_add(void *ptr)
   {
     DBUG_ENTER("unique_add");
     DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
-    if (tree.elements_in_tree > max_elements && flush())
+    if (!(tree.flag & TREE_ONLY_DUPS) && 
+        tree.elements_in_tree >= max_elements && flush())
       DBUG_RETURN(1);
     DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
   }
 
+  bool is_in_memory() { return (my_b_tell(&file) == 0); }
+  void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
+
   bool get(TABLE *table);
-  static double get_use_cost(uint *buffer, uint nkeys, uint key_size, 
-                             ulonglong max_in_memory_size);
-  inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size, 
+  
+  /* Cost of searching for an element in the tree */
+  inline static double get_search_cost(ulonglong tree_elems, uint compare_factor)
+  {
+    return log((double) tree_elems) / (compare_factor * M_LN2);
+  }  
+
+  static double get_use_cost(uint *buffer, size_t nkeys, uint key_size,
+                             ulonglong max_in_memory_size, uint compare_factor,
+                             bool intersect_fl, bool *in_memory);
+  inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
                                             ulonglong max_in_memory_size)
   {
     register ulonglong max_elems_in_tree=
-      (1 + max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
+      max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
     return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
   }
 
@@ -3422,6 +3822,11 @@ public:
 
   friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
   friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);
+
+  friend int unique_write_to_file_with_count(uchar* key, element_count count,
+                                             Unique *unique);
+  friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, 
+				            Unique *unique);
 };
 
 
@@ -3448,7 +3853,7 @@ public:
   multi_delete(TABLE_LIST *dt, uint num_of_tables);
   ~multi_delete();
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
   bool initialize_tables (JOIN *join);
   void send_error(uint errcode,const char *err);
   int do_deletes();
@@ -3465,7 +3870,7 @@ public:
 class multi_update :public select_result_interceptor
 {
   TABLE_LIST *all_tables; /* query/update command tables */
-  TABLE_LIST *leaves;     /* list of leves of join table tree */
+  List<TABLE_LIST> *leaves;     /* list of leves of join table tree */
   TABLE_LIST *update_tables, *table_being_updated;
   TABLE **tmp_tables, *main_table, *table_to_update;
   TMP_TABLE_PARAM *tmp_table_param;
@@ -3475,7 +3880,7 @@ class multi_update :public select_result_interceptor
   uint table_count;
   /*
    List of tables referenced in the CHECK OPTION condition of
-   the updated view excluding the updated table. 
+   the updated view excluding the updated table.
   */
   List <TABLE> unupdated_check_opt_tables;
   Copy_field *copy_field;
@@ -3491,12 +3896,12 @@ class multi_update :public select_result_interceptor
   bool error_handled;
 
 public:
-  multi_update(TABLE_LIST *ut, TABLE_LIST *leaves_list,
+  multi_update(TABLE_LIST *ut, List<TABLE_LIST> *leaves_list,
 	       List<Item> *fields, List<Item> *values,
 	       enum_duplicates handle_duplicates, bool ignore);
   ~multi_update();
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
   bool initialize_tables (JOIN *join);
   void send_error(uint errcode,const char *err);
   int  do_updates();
@@ -3538,7 +3943,7 @@ public:
   select_dumpvar()  { var_list.empty(); row_count= 0;}
   ~select_dumpvar() {}
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
-  bool send_data(List<Item> &items);
+  int send_data(List<Item> &items);
   bool send_eof();
   virtual bool check_simple_select() const;
   void cleanup();
@@ -3547,10 +3952,11 @@ public:
 /* Bits in sql_command_flags */
 
 #define CF_CHANGES_DATA           (1U << 0)
-/* The 2nd bit is unused -- it used to be CF_HAS_ROW_COUNT. */
+#define CF_REPORT_PROGRESS        (1U << 1)
 #define CF_STATUS_COMMAND         (1U << 2)
 #define CF_SHOW_TABLE_COMMAND     (1U << 3)
 #define CF_WRITE_LOGS_COMMAND     (1U << 4)
+
 /**
   Must be set for SQL statements that may contain
   Item expressions and/or use joins and tables.
@@ -3657,19 +4063,194 @@ inline bool add_group_to_list(THD *thd, Item *item, bool asc)
   return thd->lex->current_select->add_group_to_list(thd, item, asc);
 }
 
-#endif /* MYSQL_SERVER */
+/* inline handler methods that need to know TABLE and THD structures */
+inline void handler::increment_statistics(ulong SSV::*offset) const
+{
+  status_var_increment(table->in_use->status_var.*offset);
+}
 
-/**
-  The meat of thd_proc_info(THD*, char*), a macro that packs the last
-  three calling-info parameters.
+inline void handler::decrement_statistics(ulong SSV::*offset) const
+{
+  status_var_decrement(table->in_use->status_var.*offset);
+}
+
+inline int handler::ha_index_read_map(uchar * buf, const uchar * key,
+                                      key_part_map keypart_map,
+                                      enum ha_rkey_function find_flag)
+{
+  DBUG_ASSERT(inited==INDEX);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_read_key_count);
+  int error= index_read_map(buf, key, keypart_map, find_flag);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
+  return error;
+}
+
+
+/*
+  @note: Other index lookup/navigation functions require prior
+  handler->index_init() call. This function is different, it requires
+  that the scan is not initialized, and accepts "uint index" as an argument.
 */
-extern "C"
-const char *set_thd_proc_info(void *thd_arg, const char *info,
-                              const char *calling_func,
-                              const char *calling_file,
-                              const unsigned int calling_line);
-
-#define thd_proc_info(thd, msg) \
-  set_thd_proc_info(thd, msg, __func__, __FILE__, __LINE__)
+
+inline int handler::ha_index_read_idx_map(uchar * buf, uint index,
+                                          const uchar * key,
+                                          key_part_map keypart_map,
+                                          enum ha_rkey_function find_flag)
+{
+  DBUG_ASSERT(inited==NONE);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_read_key_count);
+  int error= index_read_idx_map(buf, index, key, keypart_map, find_flag);
+  if (!error)
+  {
+    update_rows_read();
+    index_rows_read[index]++;
+  }
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_index_next(uchar * buf)
+{
+  DBUG_ASSERT(inited==INDEX);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_read_next_count);
+  int error= index_next(buf);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_index_prev(uchar * buf)
+{
+  DBUG_ASSERT(inited==INDEX);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_read_prev_count);
+  int error= index_prev(buf);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_index_first(uchar * buf)
+{
+  DBUG_ASSERT(inited==INDEX);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_read_first_count);
+  int error= index_first(buf);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_index_last(uchar * buf)
+{
+  DBUG_ASSERT(inited==INDEX);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_read_last_count);
+  int error= index_last(buf);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_index_next_same(uchar *buf, const uchar *key,
+                                       uint keylen)
+{
+  DBUG_ASSERT(inited==INDEX);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_read_next_count);
+  int error= index_next_same(buf, key, keylen);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_ft_read(uchar *buf)
+{
+  int error= ft_read(buf);
+  if (!error)
+    update_rows_read();
+
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_rnd_next(uchar *buf)
+{
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, TRUE);
+  increment_statistics(&SSV::ha_read_rnd_next_count);
+  int error= rnd_next(buf);
+  if (!error)
+    update_rows_read();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_READ_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_rnd_pos(uchar *buf, uchar *pos)
+{
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, FALSE);
+  increment_statistics(&SSV::ha_read_rnd_count);
+  int error= rnd_pos(buf, pos);
+  if (!error)
+    update_rows_read();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_READ_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_rnd_pos_by_record(uchar *buf)
+{
+  int error= rnd_pos_by_record(buf);
+  if (!error)
+    update_rows_read();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_read_first_row(uchar *buf, uint primary_key)
+{
+  int error= read_first_row(buf, primary_key);
+  if (!error)
+    update_rows_read();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_write_tmp_row(uchar *buf)
+{
+  MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_tmp_write_count);
+  int error= write_row(buf);
+  MYSQL_INSERT_ROW_DONE(error);
+  return error;
+}
+
+inline int handler::ha_update_tmp_row(const uchar *old_data, uchar *new_data)
+{
+  MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
+  increment_statistics(&SSV::ha_tmp_update_count);
+  int error= update_row(old_data, new_data);
+  MYSQL_UPDATE_ROW_DONE(error);
+  return error;
+}
+
+#endif /* MYSQL_SERVER */
 
 #endif /* SQL_CLASS_INCLUDED */
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index e96d0e818c7..5dde1818c27 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -26,7 +26,6 @@
 #endif
 #include "sql_audit.h"
 #include "sql_connect.h"
-#include "my_global.h"
 #include "probes_mysql.h"
 #include "unireg.h"                    // REQUIRED: for other includes
 #include "sql_parse.h"                          // sql_command_flags,
@@ -38,23 +37,12 @@
 #include "sql_acl.h"  // acl_getroot, NO_ACCESS, SUPER_ACL
 #include "sql_callback.h"
 
-#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
-/*
-  Without SSL the handshake consists of one packet. This packet
-  has both client capabilites and scrambled password.
-  With SSL the handshake might consist of two packets. If the first
-  packet (client capabilities) has CLIENT_SSL flag set, we have to
-  switch to SSL and read the second packet. The scrambled password
-  is in the second packet and client_capabilites field will be ignored.
-  Maybe it is better to accept flags other than CLIENT_SSL from the
-  second packet?
-*/
-#define SSL_HANDSHAKE_SIZE      2
-#define NORMAL_HANDSHAKE_SIZE   6
-#define MIN_HANDSHAKE_SIZE      2
-#else
-#define MIN_HANDSHAKE_SIZE      6
-#endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
+HASH global_user_stats, global_client_stats, global_table_stats;
+HASH global_index_stats;
+/* Protects the above global stats */
+extern mysql_mutex_t LOCK_global_user_client_stats;
+extern mysql_mutex_t LOCK_global_table_stats;
+extern mysql_mutex_t LOCK_global_index_stats;
 
 /*
   Get structure for logging connection data for the current user
@@ -74,6 +62,7 @@ int get_or_create_user_conn(THD *thd, const char *user,
 
   DBUG_ASSERT(user != 0);
   DBUG_ASSERT(host != 0);
+  DBUG_ASSERT(thd->user_connect == 0);
 
   user_len= strlen(user);
   temp_len= (strmov(strmov(temp_user, user)+1, host) - temp_user)+1;
@@ -133,7 +122,7 @@ end:
 
 int check_for_max_user_connections(THD *thd, USER_CONN *uc)
 {
-  int error=0;
+  int error= 1;
   DBUG_ENTER("check_for_max_user_connections");
 
   mysql_mutex_lock(&LOCK_user_conn);
@@ -142,7 +131,6 @@ int check_for_max_user_connections(THD *thd, USER_CONN *uc)
       global_system_variables.max_user_connections < (uint) uc->connections)
   {
     my_error(ER_TOO_MANY_USER_CONNECTIONS, MYF(0), uc->user);
-    error=1;
     goto end;
   }
   time_out_user_resource_limits(thd, uc);
@@ -152,7 +140,6 @@ int check_for_max_user_connections(THD *thd, USER_CONN *uc)
     my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user,
              "max_user_connections",
              (long) uc->user_resources.user_conn);
-    error= 1;
     goto end;
   }
   if (uc->user_resources.conn_per_hour &&
@@ -161,10 +148,10 @@ int check_for_max_user_connections(THD *thd, USER_CONN *uc)
     my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user,
              "max_connections_per_hour",
              (long) uc->user_resources.conn_per_hour);
-    error=1;
     goto end;
   }
   uc->conn_per_hour++;
+  error= 0;
 
 end:
   if (error)
@@ -313,10 +300,13 @@ extern "C" void free_user(struct user_conn *uc)
 void init_max_user_conn(void)
 {
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
-  (void)
-    my_hash_init(&hash_user_connections,system_charset_info,max_connections,
+  if (my_hash_init(&hash_user_connections,system_charset_info,max_connections,
                  0,0, (my_hash_get_key) get_key_conn,
-                 (my_hash_free_key) free_user, 0);
+                 (my_hash_free_key) free_user, 0))
+  {
+    sql_print_error("Initializing hash_user_connections failed.");
+    exit(1);
+  }
 #endif
 }
 
@@ -370,6 +360,446 @@ void reset_mqh(LEX_USER *lu, bool get_them= 0)
 #endif /* NO_EMBEDDED_ACCESS_CHECKS */
 }
 
+/*****************************************************************************
+ Handle users statistics
+*****************************************************************************/
+
+/* 'mysql_system_user' is used for when the user is not defined for a THD. */
+static const char mysql_system_user[]= "#mysql_system#";
+
+// Returns 'user' if it's not NULL.  Returns 'mysql_system_user' otherwise.
+static const char * get_valid_user_string(char* user)
+{
+  return user ? user : mysql_system_user;
+}
+
+/*
+  Returns string as 'IP' for the client-side of the connection represented by
+  'client'. Does not allocate memory. May return "".
+*/
+
+static const char *get_client_host(THD *client)
+{
+  return client->security_ctx->host_or_ip[0] ?
+    client->security_ctx->host_or_ip :
+    client->security_ctx->host ? client->security_ctx->host : "";
+}
+
+extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
+                                     my_bool not_used __attribute__((unused)))
+{
+  *length= user_stats->user_name_length;
+  return (uchar*) user_stats->user;
+}
+
+void free_user_stats(USER_STATS* user_stats)
+{
+  my_free(user_stats);
+}
+
+void init_user_stats(USER_STATS *user_stats,
+                     const char *user,
+                     size_t user_length,
+                     const char *priv_user,
+                     uint total_connections,
+                     uint concurrent_connections,
+                     time_t connected_time,
+                     double busy_time,
+                     double cpu_time,
+                     ulonglong bytes_received,
+                     ulonglong bytes_sent,
+                     ulonglong binlog_bytes_written,
+                     ha_rows rows_sent,
+                     ha_rows rows_read,
+                     ha_rows rows_inserted,
+                     ha_rows rows_deleted,
+                     ha_rows rows_updated,
+                     ulonglong select_commands,
+                     ulonglong update_commands,
+                     ulonglong other_commands,
+                     ulonglong commit_trans,
+                     ulonglong rollback_trans,
+                     ulonglong denied_connections,
+                     ulonglong lost_connections,
+                     ulonglong access_denied_errors,
+                     ulonglong empty_queries)
+{
+  DBUG_ENTER("init_user_stats");
+  DBUG_PRINT("enter", ("user: %s  priv_user: %s", user, priv_user));
+
+  user_length= min(user_length, sizeof(user_stats->user)-1);
+  memcpy(user_stats->user, user, user_length);
+  user_stats->user[user_length]= 0;
+  user_stats->user_name_length= user_length;
+  strmake(user_stats->priv_user, priv_user, sizeof(user_stats->priv_user)-1);
+
+  user_stats->total_connections= total_connections;
+  user_stats->concurrent_connections= concurrent_connections;
+  user_stats->connected_time= connected_time;
+  user_stats->busy_time= busy_time;
+  user_stats->cpu_time= cpu_time;
+  user_stats->bytes_received= bytes_received;
+  user_stats->bytes_sent= bytes_sent;
+  user_stats->binlog_bytes_written= binlog_bytes_written;
+  user_stats->rows_sent= rows_sent;
+  user_stats->rows_updated= rows_updated;
+  user_stats->rows_read= rows_read;
+  user_stats->select_commands= select_commands;
+  user_stats->update_commands= update_commands;
+  user_stats->other_commands= other_commands;
+  user_stats->commit_trans= commit_trans;
+  user_stats->rollback_trans= rollback_trans;
+  user_stats->denied_connections= denied_connections;
+  user_stats->lost_connections= lost_connections;
+  user_stats->access_denied_errors= access_denied_errors;
+  user_stats->empty_queries= empty_queries;
+  DBUG_VOID_RETURN;
+}
+
+
+#ifdef COMPLETE_PATCH_NOT_ADDED_YET
+
+void add_user_stats(USER_STATS *user_stats,
+                    uint total_connections,
+                    uint concurrent_connections,
+                    time_t connected_time,
+                    double busy_time,
+                    double cpu_time,
+                    ulonglong bytes_received,
+                    ulonglong bytes_sent,
+                    ulonglong binlog_bytes_written,
+                    ha_rows rows_sent,
+                    ha_rows rows_read,
+                    ha_rows rows_inserted,
+                    ha_rows rows_deleted,
+                    ha_rows rows_updated,
+                    ulonglong select_commands,
+                    ulonglong update_commands,
+                    ulonglong other_commands,
+                    ulonglong commit_trans,
+                    ulonglong rollback_trans,
+                    ulonglong denied_connections,
+                    ulonglong lost_connections,
+                    ulonglong access_denied_errors,
+                    ulonglong empty_queries)
+{
+  user_stats->total_connections+= total_connections;
+  user_stats->concurrent_connections+= concurrent_connections;
+  user_stats->connected_time+= connected_time;
+  user_stats->busy_time+= busy_time;
+  user_stats->cpu_time+= cpu_time;
+  user_stats->bytes_received+= bytes_received;
+  user_stats->bytes_sent+= bytes_sent;
+  user_stats->binlog_bytes_written+= binlog_bytes_written;
+  user_stats->rows_sent+=  rows_sent;
+  user_stats->rows_inserted+= rows_inserted;
+  user_stats->rows_deleted+=  rows_deleted;
+  user_stats->rows_updated+=  rows_updated;
+  user_stats->rows_read+= rows_read;
+  user_stats->select_commands+= select_commands;
+  user_stats->update_commands+= update_commands;
+  user_stats->other_commands+= other_commands;
+  user_stats->commit_trans+= commit_trans;
+  user_stats->rollback_trans+= rollback_trans;
+  user_stats->denied_connections+= denied_connections;
+  user_stats->lost_connections+= lost_connections;
+  user_stats->access_denied_errors+= access_denied_errors;
+  user_stats->empty_queries+= empty_queries;
+}
+#endif
+
+
+void init_global_user_stats(void)
+{
+  if (my_hash_init(&global_user_stats, system_charset_info, max_connections,
+                0, 0, (my_hash_get_key) get_key_user_stats,
+                (my_hash_free_key)free_user_stats, 0))
+  {
+    sql_print_error("Initializing global_user_stats failed.");
+    exit(1);
+  }
+}
+
+void init_global_client_stats(void)
+{
+  if (my_hash_init(&global_client_stats, system_charset_info, max_connections,
+                0, 0, (my_hash_get_key) get_key_user_stats,
+                (my_hash_free_key)free_user_stats, 0))
+  {
+    sql_print_error("Initializing global_client_stats failed.");
+    exit(1);
+  }
+}
+
+extern "C" uchar *get_key_table_stats(TABLE_STATS *table_stats, size_t *length,
+                                      my_bool not_used __attribute__((unused)))
+{
+  *length= table_stats->table_name_length;
+  return (uchar*) table_stats->table;
+}
+
+extern "C" void free_table_stats(TABLE_STATS* table_stats)
+{
+  my_free(table_stats);
+}
+
+void init_global_table_stats(void)
+{
+  if (my_hash_init(&global_table_stats, system_charset_info, max_connections,
+                0, 0, (my_hash_get_key) get_key_table_stats,
+                (my_hash_free_key)free_table_stats, 0)) {
+    sql_print_error("Initializing global_table_stats failed.");
+    exit(1);
+  }
+}
+
+extern "C" uchar *get_key_index_stats(INDEX_STATS *index_stats, size_t *length,
+                                     my_bool not_used __attribute__((unused)))
+{
+  *length= index_stats->index_name_length;
+  return (uchar*) index_stats->index;
+}
+
+extern "C" void free_index_stats(INDEX_STATS* index_stats)
+{
+  my_free(index_stats);
+}
+
+void init_global_index_stats(void)
+{
+  if (my_hash_init(&global_index_stats, system_charset_info, max_connections,
+                0, 0, (my_hash_get_key) get_key_index_stats,
+                (my_hash_free_key)free_index_stats, 0))
+  {
+    sql_print_error("Initializing global_index_stats failed.");
+    exit(1);
+  }
+}
+
+
+void free_global_user_stats(void)
+{
+  my_hash_free(&global_user_stats);
+}
+
+void free_global_table_stats(void)
+{
+  my_hash_free(&global_table_stats);
+}
+
+void free_global_index_stats(void)
+{
+  my_hash_free(&global_index_stats);
+}
+
+void free_global_client_stats(void)
+{
+  my_hash_free(&global_client_stats);
+}
+
+/*
+  Increments the global stats connection count for an entry from
+  global_client_stats or global_user_stats. Returns 0 on success
+  and 1 on error.
+*/
+
+static bool increment_count_by_name(const char *name, size_t name_length,
+                                   const char *role_name,
+                                   HASH *users_or_clients, THD *thd)
+{
+  USER_STATS *user_stats;
+
+  if (!(user_stats= (USER_STATS*) my_hash_search(users_or_clients, (uchar*) name,
+                                              name_length)))
+  {
+    /* First connection for this user or client */
+    if (!(user_stats= ((USER_STATS*)
+                       my_malloc(sizeof(USER_STATS),
+                                 MYF(MY_WME | MY_ZEROFILL)))))
+      return TRUE;                              // Out of memory
+
+    init_user_stats(user_stats, name, name_length, role_name,
+                    0, 0,      // connections
+                    0, 0, 0,   // time
+                    0, 0, 0,   // bytes sent, received and written
+                    0, 0,      // Rows sent and read
+                    0, 0, 0,   // rows inserted, deleted and updated
+                    0, 0, 0,   // select, update and other commands
+                    0, 0,      // commit and rollback trans
+                    thd->status_var.access_denied_errors,
+                    0,         // lost connections
+                    0,         // access denied errors
+                    0);        // empty queries
+
+    if (my_hash_insert(users_or_clients, (uchar*)user_stats))
+    {
+      my_free(user_stats);
+      return TRUE;                              // Out of memory
+    }
+  }
+  user_stats->total_connections++;
+  return FALSE;
+}
+
+
+/*
+  Increments the global user and client stats connection count.
+
+  @param use_lock  if true, LOCK_global_user_client_stats will be locked
+
+  @retval 0 ok
+  @retval 1 error.
+*/
+
+#ifndef EMBEDDED_LIBRARY
+static bool increment_connection_count(THD* thd, bool use_lock)
+{
+  const char *user_string= get_valid_user_string(thd->main_security_ctx.user);
+  const char *client_string= get_client_host(thd);
+  bool return_value= FALSE;
+
+  if (!thd->userstat_running)
+    return FALSE;
+
+  if (use_lock)
+    mysql_mutex_lock(&LOCK_global_user_client_stats);
+
+  if (increment_count_by_name(user_string, strlen(user_string), user_string,
+                              &global_user_stats, thd))
+  {
+    return_value= TRUE;
+    goto end;
+  }
+  if (increment_count_by_name(client_string, strlen(client_string),
+                              user_string, &global_client_stats, thd))
+  {
+    return_value= TRUE;
+    goto end;
+  }
+
+end:
+  if (use_lock)
+    mysql_mutex_unlock(&LOCK_global_user_client_stats);
+  return return_value;
+}
+#endif
+
+/*
+  Used to update the global user and client stats
+*/
+
+static void update_global_user_stats_with_user(THD *thd,
+                                               USER_STATS *user_stats,
+                                               time_t now)
+{
+  DBUG_ASSERT(thd->userstat_running);
+
+  user_stats->connected_time+= now - thd->last_global_update_time;
+  user_stats->busy_time+=  (thd->status_var.busy_time -
+                            thd->org_status_var.busy_time);
+  user_stats->cpu_time+=   (thd->status_var.cpu_time -
+                            thd->org_status_var.cpu_time); 
+  /*
+    This is handle specially as bytes_recieved is incremented BEFORE
+    org_status_var is copied.
+  */
+  user_stats->bytes_received+= (thd->org_status_var.bytes_received-
+                                thd->start_bytes_received);
+  user_stats->bytes_sent+= (thd->status_var.bytes_sent -
+                            thd->org_status_var.bytes_sent);
+  user_stats->binlog_bytes_written+=
+    (thd->status_var.binlog_bytes_written -
+     thd->org_status_var.binlog_bytes_written);
+  /* We are not counting rows in internal temporary tables here ! */
+  user_stats->rows_read+=      (thd->status_var.rows_read -
+                                thd->org_status_var.rows_read);
+  user_stats->rows_sent+=      (thd->status_var.rows_sent -
+                                thd->org_status_var.rows_sent);
+  user_stats->rows_inserted+=  (thd->status_var.ha_write_count -
+                                thd->org_status_var.ha_write_count);
+  user_stats->rows_deleted+=   (thd->status_var.ha_delete_count -
+                                thd->org_status_var.ha_delete_count);
+  user_stats->rows_updated+=   (thd->status_var.ha_update_count -
+                                thd->org_status_var.ha_update_count);
+  user_stats->select_commands+= thd->select_commands;
+  user_stats->update_commands+= thd->update_commands;
+  user_stats->other_commands+=  thd->other_commands;
+  user_stats->commit_trans+=   (thd->status_var.ha_commit_count -
+                                thd->org_status_var.ha_commit_count);
+  user_stats->rollback_trans+= (thd->status_var.ha_rollback_count +
+                                thd->status_var.ha_savepoint_rollback_count -
+                                thd->org_status_var.ha_rollback_count -
+                                thd->org_status_var.
+                                ha_savepoint_rollback_count);
+  user_stats->access_denied_errors+=
+    (thd->status_var.access_denied_errors -
+     thd->org_status_var.access_denied_errors);
+  user_stats->empty_queries+=   (thd->status_var.empty_queries -
+                                 thd->org_status_var.empty_queries);
+
+  /* The following can only contain 0 or 1 and then connection ends */
+  user_stats->denied_connections+= thd->status_var.access_denied_errors;
+  user_stats->lost_connections+=   thd->status_var.lost_connections;
+}
+
+
+/*  Updates the global stats of a user or client */
+void update_global_user_stats(THD *thd, bool create_user, time_t now)
+{
+  const char *user_string, *client_string;
+  USER_STATS *user_stats;
+  size_t user_string_length, client_string_length;
+  DBUG_ASSERT(thd->userstat_running);
+
+  user_string= get_valid_user_string(thd->main_security_ctx.user);
+  user_string_length= strlen(user_string);
+  client_string= get_client_host(thd);
+  client_string_length= strlen(client_string);
+
+  mysql_mutex_lock(&LOCK_global_user_client_stats);
+
+  // Update by user name
+  if ((user_stats= (USER_STATS*) my_hash_search(&global_user_stats,
+                                             (uchar*) user_string,
+                                             user_string_length)))
+  {
+    /* Found user. */
+    update_global_user_stats_with_user(thd, user_stats, now);
+  }
+  else
+  {
+    /* Create the entry */
+    if (create_user)
+    {
+      increment_count_by_name(user_string, user_string_length, user_string,
+                              &global_user_stats, thd);
+    }
+  }
+
+  /* Update by client IP */
+  if ((user_stats= (USER_STATS*)my_hash_search(&global_client_stats,
+                                            (uchar*) client_string,
+                                            client_string_length)))
+  {
+    // Found by client IP
+    update_global_user_stats_with_user(thd, user_stats, now);
+  }
+  else
+  {
+    // Create the entry
+    if (create_user)
+    {
+      increment_count_by_name(client_string, client_string_length,
+                              user_string, &global_client_stats, thd);
+    }
+  }
+  /* Reset variables only used for counting */
+  thd->select_commands= thd->update_commands= thd->other_commands= 0;
+  thd->last_global_update_time= now;
+
+  mysql_mutex_unlock(&LOCK_global_user_client_stats);
+}
+
 
 /**
   Set thread character set variables from the given ID
@@ -544,7 +974,7 @@ bool setup_connection_thread_globals(THD *thd)
   {
     close_connection(thd, ER_OUT_OF_RESOURCES);
     statistic_increment(aborted_connects,&LOCK_status);
-    MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0));
+    MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0));
     return 1;                                   // Error
   }
   return 0;
@@ -566,7 +996,6 @@ bool setup_connection_thread_globals(THD *thd)
     1    error
 */
 
-
 bool login_connection(THD *thd)
 {
   NET *net= &thd->net;
@@ -594,6 +1023,14 @@ bool login_connection(THD *thd)
   /* Connect completed, set read/write timeouts back to default */
   my_net_set_read_timeout(net, thd->variables.net_read_timeout);
   my_net_set_write_timeout(net, thd->variables.net_write_timeout);
+
+  /*  Updates global user connection stats. */
+  if (increment_connection_count(thd, TRUE))
+  {
+    my_error(ER_OUTOFMEMORY, MYF(0), 2*sizeof(USER_STATS));
+    DBUG_RETURN(1);
+  }
+
   DBUG_RETURN(0);
 }
 
@@ -609,8 +1046,14 @@ void end_connection(THD *thd)
 {
   NET *net= &thd->net;
   plugin_thdvar_cleanup(thd);
+
   if (thd->user_connect)
   {
+    /*
+      We decrease this variable early to make it easy to log again quickly.
+      This code is not critical as we will in any case do this test
+      again in thd->cleanup()
+    */
     decrease_user_connections(thd->user_connect);
     /*
       The thread may returned back to the pool and assigned to a user
@@ -623,6 +1066,7 @@ void end_connection(THD *thd)
   if (thd->killed || (net->error && net->vio != 0))
   {
     statistic_increment(aborted_threads,&LOCK_status);
+    status_var_increment(thd->status_var.lost_connections);
   }
 
   if (net->error && net->vio != 0)
@@ -739,13 +1183,13 @@ void do_handle_one_connection(THD *thd_arg)
 {
   THD *thd= thd_arg;
 
-  thd->thr_create_utime= my_micro_time();
+  thd->thr_create_utime= microsecond_interval_timer();
 
   if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0))
   {
     close_connection(thd, ER_OUT_OF_RESOURCES);
     statistic_increment(aborted_connects,&LOCK_status);
-    MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0));
+    MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0));
     return;
   }
 
@@ -777,11 +1221,13 @@ void do_handle_one_connection(THD *thd_arg)
 
   for (;;)
   {
-    bool rc;
+    bool create_user= TRUE;
 
-    rc= thd_prepare_connection(thd);
-    if (rc)
+    if (thd_prepare_connection(thd))
+    {
+      create_user= FALSE;
       goto end_thread;
+    }      
 
     while (thd_is_connection_alive(thd))
     {
@@ -793,12 +1239,15 @@ void do_handle_one_connection(THD *thd_arg)
    
 end_thread:
     close_connection(thd);
-    if (MYSQL_CALLBACK_ELSE(thread_scheduler, end_thread, (thd, 1), 0))
+
+    if (thd->userstat_running)
+      update_global_user_stats(thd, create_user, time(NULL));
+
+    if (MYSQL_CALLBACK_ELSE(thd->scheduler, end_thread, (thd, 1), 0))
       return;                                 // Probably no-threads
 
     /*
-      If end_thread() returns, we are either running with
-      thread-handler=no-threads or this thread has been schedule to
+      If end_thread() returns, this thread has been schedule to
       handle the next connection.
     */
     thd= current_thd;
diff --git a/sql/sql_connect.h b/sql/sql_connect.h
index df6ac789709..bab171606ba 100644
--- a/sql/sql_connect.h
+++ b/sql/sql_connect.h
@@ -18,13 +18,23 @@
 
 #include "my_sys.h"                          /* pthread_handler_t */
 #include "mysql_com.h"                         /* enum_server_command */
+#include "structs.h"
+#include <hash.h>
 
 class THD;
 typedef struct st_lex_user LEX_USER;
 typedef struct user_conn USER_CONN;
 
 void init_max_user_conn(void);
+void init_global_user_stats(void);
+void init_global_table_stats(void);
+void init_global_index_stats(void);
+void init_global_client_stats(void);
 void free_max_user_conn(void);
+void free_global_user_stats(void);
+void free_global_table_stats(void);
+void free_global_index_stats(void);
+void free_global_client_stats(void);
 
 pthread_handler_t handle_one_connection(void *arg);
 void do_handle_one_connection(THD *thd_arg);
@@ -32,21 +42,32 @@ bool init_new_connection_handler_thread();
 void reset_mqh(LEX_USER *lu, bool get_them);
 bool check_mqh(THD *thd, uint check_command);
 void time_out_user_resource_limits(THD *thd, USER_CONN *uc);
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
 void decrease_user_connections(USER_CONN *uc);
+#else
+#define decrease_user_connections(X) do { } while(0)       /* nothing */
+#endif
 bool thd_init_client_charset(THD *thd, uint cs_number);
 bool setup_connection_thread_globals(THD *thd);
 bool thd_prepare_connection(THD *thd);
 bool thd_is_connection_alive(THD *thd);
 
-int check_user(THD *thd, enum enum_server_command command,
-	       const char *passwd, uint passwd_len, const char *db,
-	       bool check_count);
-
 bool login_connection(THD *thd);
 void prepare_new_connection_state(THD* thd);
 void end_connection(THD *thd);
+void update_global_user_stats(THD* thd, bool create_user, time_t now);
 int get_or_create_user_conn(THD *thd, const char *user,
                             const char *host, const USER_RESOURCES *mqh);
 int check_for_max_user_connections(THD *thd, USER_CONN *uc);
 
+extern HASH global_user_stats;
+extern HASH global_client_stats;
+extern HASH global_table_stats;
+extern HASH global_index_stats;
+
+extern mysql_mutex_t LOCK_global_user_client_stats;
+extern mysql_mutex_t LOCK_global_table_stats;
+extern mysql_mutex_t LOCK_global_index_stats;
+extern mysql_mutex_t LOCK_stats;
+
 #endif /* SQL_CONNECT_INCLUDED */
diff --git a/sql/sql_const.h b/sql/sql_const.h
index 56747ed4965..cfbf077bd91 100644
--- a/sql/sql_const.h
+++ b/sql/sql_const.h
@@ -30,7 +30,7 @@
 #define MAX_FIELD_NAME 34			/* Max colum name length +2 */
 #define MAX_SYS_VAR_LENGTH 32
 #define MAX_KEY MAX_INDEXES                     /* Max used keys */
-#define MAX_REF_PARTS 16			/* Max parts used as ref */
+#define MAX_REF_PARTS 32			/* Max parts used as ref */
 #define MAX_KEY_LENGTH 3072			/* max possible key */
 #if SIZEOF_OFF_T > 4
 #define MAX_REFLENGTH 8				/* Max length for record ref */
@@ -51,10 +51,13 @@
 #define MAX_BIT_FIELD_LENGTH    64      /* Max length in bits for bit fields */
 
 #define MAX_DATE_WIDTH		10	/* YYYY-MM-DD */
-#define MAX_TIME_WIDTH		23	/* -DDDDDD HH:MM:SS.###### */
+#define MIN_TIME_WIDTH          10      /* -HHH:MM:SS */
+#define MAX_TIME_WIDTH          16      /* -DDDDDD HH:MM:SS */
+#define MAX_TIME_FULL_WIDTH     23      /* -DDDDDD HH:MM:SS.###### */
 #define MAX_DATETIME_FULL_WIDTH 29	/* YYYY-MM-DD HH:MM:SS.###### AM */
 #define MAX_DATETIME_WIDTH	19	/* YYYY-MM-DD HH:MM:SS */
 #define MAX_DATETIME_COMPRESSED_WIDTH 14  /* YYYYMMDDHHMMSS */
+#define MAX_DATETIME_PRECISION  6
 
 #define MAX_TABLES	(sizeof(table_map)*8-3)	/* Max tables in join */
 #define PARAM_TABLE_BIT	(((table_map) 1) << (sizeof(table_map)*8-3))
@@ -68,7 +71,7 @@
 #define MAX_SELECT_NESTING (sizeof(nesting_map)*8-1)
 
 #define MAX_SORT_MEMORY 2048*1024
-#define MIN_SORT_MEMORY 32*1024
+#define MIN_SORT_MEMORY 1024
 
 /* Some portable defines */
 
@@ -167,7 +170,10 @@
   Number of comparisons of table rowids equivalent to reading one row from a 
   table.
 */
-#define TIME_FOR_COMPARE_ROWID  (TIME_FOR_COMPARE*2)
+#define TIME_FOR_COMPARE_ROWID  (TIME_FOR_COMPARE*100)
+
+/* cost1 is better that cost2 only if cost1 + COST_EPS < cost2 */
+#define COST_EPS  0.001
 
 /*
   For sequential disk seeks the cost formula is:
@@ -176,11 +182,11 @@
   The cost of average seek 
     DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
 */
-#define DISK_SEEK_BASE_COST ((double)0.5)
+#define DISK_SEEK_BASE_COST ((double)0.9)
 
 #define BLOCKS_IN_AVG_SEEK  128
 
-#define DISK_SEEK_PROP_COST ((double)0.5/BLOCKS_IN_AVG_SEEK)
+#define DISK_SEEK_PROP_COST ((double)0.1/BLOCKS_IN_AVG_SEEK)
 
 
 /**
@@ -190,6 +196,12 @@
 */
 #define MATCHING_ROWS_IN_OTHER_TABLE 10
 
+/*
+  Subquery materialization-related constants
+*/
+#define HEAP_TEMPTABLE_LOOKUP_COST 0.05
+#define DISK_TEMPTABLE_LOOKUP_COST 1.0
+
 #define MY_CHARSET_BIN_MB_MAXLEN 1
 
 /** Don't pack string keys shorter than this (if PACK_KEYS=1 isn't used). */
@@ -219,7 +231,6 @@
 #define DELAYED_LIMIT		100		/**< pause after xxx inserts */
 #define DELAYED_QUEUE_SIZE	1000
 #define DELAYED_WAIT_TIMEOUT	5*60		/**< Wait for delayed insert */
-#define FLUSH_TIME		0		/**< Don't flush tables */
 #define MAX_CONNECT_ERRORS	10		///< errors before disabling host
 
 #define LONG_TIMEOUT ((ulong) 3600L*24L*365L)
@@ -231,8 +242,6 @@
 #define MAX_TIME_ZONE_NAME_LENGTH       (NAME_LEN + 1)
 
 #if defined(__WIN__)
-#undef	FLUSH_TIME
-#define FLUSH_TIME	1800			/**< Flush every half hour */
 
 #define INTERRUPT_PRIOR -2
 #define CONNECT_PRIOR	-1
diff --git a/sql/sql_crypt.cc b/sql/sql_crypt.cc
index 55c87ec86d3..bcc8ad0b10f 100644
--- a/sql/sql_crypt.cc
+++ b/sql/sql_crypt.cc
@@ -33,7 +33,7 @@
 void SQL_CRYPT::init(ulong *rand_nr)
 {
   uint i;
-  randominit(&rand,rand_nr[0],rand_nr[1]);
+  my_rnd_init(&rand,rand_nr[0],rand_nr[1]);
 
   for (i=0 ; i<=255; i++)
    decode_buff[i]= (char) i;
diff --git a/sql/sql_crypt.h b/sql/sql_crypt.h
index 4854664d7f0..3a12d603601 100644
--- a/sql/sql_crypt.h
+++ b/sql/sql_crypt.h
@@ -26,7 +26,7 @@
 
 class SQL_CRYPT :public Sql_alloc
 {
-  struct rand_struct rand,org_rand;
+  struct my_rnd_struct rand,org_rand;
   char decode_buff[256],encode_buff[256];
   uint shift;
  public:
diff --git a/sql/sql_cursor.cc b/sql/sql_cursor.cc
index aaf483f7a2f..7350618adc0 100644
--- a/sql/sql_cursor.cc
+++ b/sql/sql_cursor.cc
@@ -281,7 +281,7 @@ int Materialized_cursor::open(JOIN *join __attribute__((unused)))
   /* Create a list of fields and start sequential scan. */
 
   rc= result->prepare(item_list, &fake_unit);
-  rc= !rc && table->file->ha_rnd_init(TRUE);
+  rc= !rc && table->file->ha_rnd_init_with_error(TRUE);
   is_rnd_inited= !rc;
 
   thd->restore_active_arena(this, &backup_arena);
@@ -322,14 +322,14 @@ void Materialized_cursor::fetch(ulong num_rows)
   result->begin_dataset();
   for (fetch_limit+= num_rows; fetch_count < fetch_limit; fetch_count++)
   {
-    if ((res= table->file->rnd_next(table->record[0])))
+    if ((res= table->file->ha_rnd_next(table->record[0])))
       break;
     /* Send data only if the read was successful. */
     /*
       If network write failed (i.e. due to a closed socked),
       the error has already been set. Just return.
     */
-    if (result->send_data(item_list))
+    if (result->send_data(item_list) > 0)
       return;
   }
 
@@ -384,7 +384,9 @@ bool Select_materialize::send_result_set_metadata(List<Item> &list, uint flags)
 {
   DBUG_ASSERT(table == 0);
   if (create_result_table(unit->thd, unit->get_unit_column_types(),
-                          FALSE, thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS, ""))
+                          FALSE,
+                          thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS,
+                          "", FALSE, TRUE))
     return TRUE;
 
   materialized_cursor= new (&table->mem_root)
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index 99cb8cf4b3a..1cd615394fb 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -34,6 +34,7 @@
 #include <mysys_err.h>
 #include "sp.h"
 #include "events.h"
+#include "sql_handler.h"
 #include <my_dir.h>
 #include <m_ctype.h>
 #include "log.h"
@@ -860,7 +861,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
 
     ha_drop_database(path);
     tmp_disable_binlog(thd);
-    query_cache_invalidate1(db);
+    query_cache_invalidate1(thd, db);
     (void) sp_drop_db_routines(thd, db); /* @todo Do not ignore errors */
 #ifdef HAVE_EVENT_SCHEDULER
     Events::drop_schema_events(thd, db);
@@ -1357,13 +1358,9 @@ static inline bool
 cmp_db_names(const char *db1_name,
              const char *db2_name)
 {
-  return
-         /* db1 is NULL and db2 is NULL */
-         (!db1_name && !db2_name) ||
-
-         /* db1 is not-NULL, db2 is not-NULL, db1 == db2. */
-         (db1_name && db2_name &&
-         my_strcasecmp(system_charset_info, db1_name, db2_name) == 0);
+  return ((!db1_name && !db2_name) ||
+          (db1_name && db2_name &&
+           my_strcasecmp(system_charset_info, db1_name, db2_name) == 0));
 }
 
 
@@ -1436,12 +1433,9 @@ bool mysql_change_db(THD *thd, const LEX_STRING *new_db_name, bool force_switch)
   Security_context *sctx= thd->security_ctx;
   ulong db_access= sctx->db_access;
   CHARSET_INFO *db_default_cl;
-
   DBUG_ENTER("mysql_change_db");
-  DBUG_PRINT("enter",("name: '%s'", new_db_name->str));
 
-  if (new_db_name == NULL ||
-      new_db_name->length == 0)
+  if (new_db_name->length == 0)
   {
     if (force_switch)
     {
@@ -1450,8 +1444,7 @@ bool mysql_change_db(THD *thd, const LEX_STRING *new_db_name, bool force_switch)
         after loading stored program. The thing is that loading of stored
         program can happen when there is no current database.
 
-        TODO: actually, new_db_name and new_db_name->str seem to be always
-        non-NULL. In case of stored program, new_db_name->str == "" and
+        In case of stored program, new_db_name->str == "" and
         new_db_name->length == 0.
       */
 
@@ -1466,6 +1459,7 @@ bool mysql_change_db(THD *thd, const LEX_STRING *new_db_name, bool force_switch)
       DBUG_RETURN(TRUE);
     }
   }
+  DBUG_PRINT("enter",("name: '%s'", new_db_name->str));
 
   if (is_infoschema_db(new_db_name->str, new_db_name->length))
   {
diff --git a/sql/sql_db.h b/sql/sql_db.h
index bc5c09d8541..1f447c11a52 100644
--- a/sql/sql_db.h
+++ b/sql/sql_db.h
@@ -19,9 +19,7 @@
 #include "hash.h"                               /* HASH */
 
 class THD;
-typedef struct charset_info_st CHARSET_INFO;
 typedef struct st_ha_create_information HA_CREATE_INFO;
-typedef struct st_mysql_lex_string LEX_STRING;
 
 int mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create, bool silent);
 bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create);
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 4fd1815ad7f..a70f00a439c 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -36,8 +36,8 @@
 #include "sql_trigger.h"
 #include "transaction.h"
 #include "records.h"                            // init_read_record,
+#include "sql_derived.h"                        // mysql_handle_list_of_derived
                                                 // end_read_record
-
 /**
   Implement DELETE SQL word.
 
@@ -59,7 +59,6 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
   bool          const_cond_result;
   ha_rows	deleted= 0;
   bool          reverse= FALSE;
-  bool          skip_record;
   ORDER *order= (ORDER *) ((order_list && order_list->elements) ?
                            order_list->first : NULL);
   uint usable_index= MAX_KEY;
@@ -70,10 +69,21 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
 
   if (open_and_lock_tables(thd, table_list, TRUE, 0))
     DBUG_RETURN(TRUE);
-  if (!(table= table_list->table))
+
+  if (mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(TRUE);
+  if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
+    DBUG_RETURN(TRUE);
+
+  if (!table_list->updatable)
+  {
+     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE");
+     DBUG_RETURN(TRUE);
+  }
+  if (!(table= table_list->table) || !table->created)
   {
-    my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
-	     table_list->view_db.str, table_list->view_name.str);
+      my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
+	       table_list->view_db.str, table_list->view_name.str);
     DBUG_RETURN(TRUE);
   }
   thd_proc_info(thd, "init");
@@ -82,6 +92,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
   if (mysql_prepare_delete(thd, table_list, &conds))
     DBUG_RETURN(TRUE);
 
+  if (thd->lex->current_select->first_cond_optimization)
+  {
+    thd->lex->current_select->save_leaf_tables(thd);
+    thd->lex->current_select->first_cond_optimization= 0;
+  }
   /* check ORDER BY even if it can be ignored */
   if (order)
   {
@@ -103,6 +118,10 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
     }
   }
 
+  /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
+  if (select_lex->optimize_unflattened_subqueries())
+    DBUG_RETURN(TRUE);
+
   const_cond= (!conds || conds->const_item());
   safe_update=test(thd->variables.option_bits & OPTION_SAFE_UPDATES);
   if (safe_update && const_cond)
@@ -137,7 +156,6 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
       - there should be no delete triggers associated with the table.
   */
   if (!using_limit && const_cond_result &&
-      !(specialflag & (SPECIAL_NO_NEW_FUNC | SPECIAL_SAFE_MODE)) &&
        (!thd->is_current_stmt_binlog_format_row() &&
         !(table->triggers && table->triggers->has_delete_triggers())))
   {
@@ -232,8 +250,14 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
     order= simple_remove_const(order, conds);
 
     bool need_sort;
-    usable_index= get_index_for_order(order, table, select, limit,
-                                      &need_sort, &reverse);
+    if (select && select->quick && select->quick->unique_key_range())
+    { // Single row select (always "ordered")
+      need_sort= FALSE;
+      usable_index= MAX_KEY;
+    }
+    else
+      usable_index= get_index_for_order(order, table, select, limit,
+                                        &need_sort, &reverse);
     if (need_sort)
     {
       DBUG_ASSERT(usable_index == MAX_KEY);
@@ -268,8 +292,15 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
     free_underlaid_joins(thd, select_lex);
     DBUG_RETURN(TRUE);
   }
-  if (usable_index==MAX_KEY || (select && select->quick))
-    init_read_record(&info, thd, table, select, 1, 1, FALSE);
+  if (usable_index == MAX_KEY || (select && select->quick))
+  {
+    if (init_read_record(&info, thd, table, select, 1, 1, FALSE))
+    {
+      delete select;
+      free_underlaid_joins(thd, select_lex);
+      DBUG_RETURN(TRUE);
+    }
+  }
   else
     init_read_record_idx(&info, thd, table, 1, usable_index, reverse);
 
@@ -297,11 +328,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
   while (!(error=info.read_record(&info)) && !thd->killed &&
 	 ! thd->is_error())
   {
+    update_virtual_fields(thd, table);
     thd->examined_row_count++;
     // thd->is_error() is tested to disallow delete row on error
-    if (!select || (!select->skip_record(thd, &skip_record) && !skip_record))
+    if (!select || select->skip_record(thd) > 0)
     {
-
       if (table->triggers &&
           table->triggers->process_triggers(thd, TRG_EVENT_DELETE,
                                             TRG_ACTION_BEFORE, FALSE))
@@ -337,8 +368,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
 	  InnoDB it can fail in a FOREIGN KEY error or an
 	  out-of-tablespace error.
 	*/
- 	error= 1;
-	break;
+        if (!select_lex->no_error)
+        {
+          error= 1;
+          break;
+        }
       }
     }
     else
@@ -368,6 +402,12 @@ cleanup:
     query_cache_invalidate3(thd, table_list, 1);
   }
 
+  if (thd->lex->current_select->first_cond_optimization)
+  {
+    thd->lex->current_select->save_leaf_tables(thd);
+    thd->lex->current_select->first_cond_optimization= 0;
+  }
+
   delete select;
   transactional_table= table->file->has_transactions();
 
@@ -439,8 +479,8 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
   if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
                                     &thd->lex->select_lex.top_join_list,
                                     table_list, 
-                                    &select_lex->leaf_tables, FALSE, 
-                                    DELETE_ACL, SELECT_ACL) ||
+                                    select_lex->leaf_tables, FALSE, 
+                                    DELETE_ACL, SELECT_ACL, TRUE) ||
       setup_conds(thd, table_list, select_lex->leaf_tables, conds) ||
       setup_ftfuncs(select_lex))
     DBUG_RETURN(TRUE);
@@ -462,7 +502,7 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
     fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array))
     DBUG_RETURN(TRUE);
 
-  select_lex->fix_prepare_information(thd, conds, &fake_conds);
+  select_lex->fix_prepare_information(thd, conds, &fake_conds); 
   DBUG_RETURN(FALSE);
 }
 
@@ -498,6 +538,12 @@ int mysql_multi_delete_prepare(THD *thd)
   TABLE_LIST *target_tbl;
   DBUG_ENTER("mysql_multi_delete_prepare");
 
+  if (mysql_handle_derived(lex, DT_INIT))
+    DBUG_RETURN(TRUE);
+  if (mysql_handle_derived(lex, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(TRUE);
+  if (mysql_handle_derived(lex, DT_PREPARE))
+    DBUG_RETURN(TRUE);
   /*
     setup_tables() need for VIEWs. JOIN::prepare() will not do it second
     time.
@@ -507,10 +553,12 @@ int mysql_multi_delete_prepare(THD *thd)
   if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
                                     &thd->lex->select_lex.top_join_list,
                                     lex->query_tables,
-                                    &lex->select_lex.leaf_tables, FALSE, 
-                                    DELETE_ACL, SELECT_ACL))
+                                    lex->select_lex.leaf_tables, FALSE, 
+                                    DELETE_ACL, SELECT_ACL, FALSE))
     DBUG_RETURN(TRUE);
 
+  if (lex->select_lex.handle_derived(thd->lex, DT_MERGE))  
+    DBUG_RETURN(TRUE);
 
   /*
     Multi-delete can't be constructed over-union => we always have
@@ -522,14 +570,14 @@ int mysql_multi_delete_prepare(THD *thd)
        target_tbl;
        target_tbl= target_tbl->next_local)
   {
-    if (!(target_tbl->table= target_tbl->correspondent_table->table))
+
+    target_tbl->table= target_tbl->correspondent_table->table;
+    if (target_tbl->correspondent_table->is_multitable())
     {
-      DBUG_ASSERT(target_tbl->correspondent_table->view &&
-                  target_tbl->correspondent_table->multitable_view);
-      my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
-               target_tbl->correspondent_table->view_db.str,
-               target_tbl->correspondent_table->view_name.str);
-      DBUG_RETURN(TRUE);
+       my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
+                target_tbl->correspondent_table->view_db.str,
+                target_tbl->correspondent_table->view_name.str);
+       DBUG_RETURN(TRUE);
     }
 
     if (!target_tbl->correspondent_table->updatable ||
@@ -559,6 +607,10 @@ int mysql_multi_delete_prepare(THD *thd)
     with further calls to unique_table
   */
   lex->select_lex.exclude_from_table_unique_test= FALSE;
+  
+  if (lex->select_lex.save_prep_leaf_tables(thd))
+    DBUG_RETURN(TRUE);
+  
   DBUG_RETURN(FALSE);
 }
 
@@ -579,6 +631,12 @@ multi_delete::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
   unit= u;
   do_delete= 1;
   thd_proc_info(thd, "deleting from main table");
+  SELECT_LEX *select_lex= u->first_select();
+  if (select_lex->first_cond_optimization)
+  {
+    if (select_lex->handle_derived(thd->lex, DT_MERGE))
+      DBUG_RETURN(TRUE);
+  }
   DBUG_RETURN(0);
 }
 
@@ -612,9 +670,10 @@ multi_delete::initialize_tables(JOIN *join)
 
 
   walk= delete_tables;
-  for (JOIN_TAB *tab=join->join_tab, *end=join->join_tab+join->tables;
-       tab < end;
-       tab++)
+
+  for (JOIN_TAB *tab= first_linear_tab(join, WITH_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
     if (tab->table->map & tables_to_delete_from)
     {
@@ -693,7 +752,7 @@ multi_delete::~multi_delete()
 }
 
 
-bool multi_delete::send_data(List<Item> &values)
+int multi_delete::send_data(List<Item> &values)
 {
   int secure_counter= delete_while_scanning ? -1 : 0;
   TABLE_LIST *del_table;
@@ -890,7 +949,10 @@ int multi_delete::do_table_deletes(TABLE *table, bool ignore)
   READ_RECORD info;
   ha_rows last_deleted= deleted;
   DBUG_ENTER("do_deletes_for_table");
-  init_read_record(&info, thd, table, NULL, 0, 1, FALSE);
+
+  if (init_read_record(&info, thd, table, NULL, 0, 1, FALSE))
+    DBUG_RETURN(1);
+
   /*
     Ignore any rows not found in reference tables as they may already have
     been deleted by foreign key handling
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index e7ab90954d9..dee2f01fb15 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -25,40 +25,85 @@
 #include "unireg.h"
 #include "sql_derived.h"
 #include "sql_select.h"
+#include "sql_base.h"
 #include "sql_view.h"                         // check_duplicate_names
 #include "sql_acl.h"                          // SELECT_ACL
 
+typedef bool (*dt_processor)(THD *thd, LEX *lex, TABLE_LIST *derived);
+
+bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived);
+
+
+dt_processor processors[]=
+{
+  &mysql_derived_init,
+  &mysql_derived_prepare,
+  &mysql_derived_optimize,
+  &mysql_derived_merge,
+  &mysql_derived_merge_for_insert,
+  &mysql_derived_create,
+  &mysql_derived_fill,
+  &mysql_derived_reinit,
+};
 
 /*
-  Call given derived table processor (preparing or filling tables)
+  Run specified phases on all derived tables/views in given LEX.
 
-  SYNOPSIS
-    mysql_handle_derived()
-    lex                 LEX for this thread
-    processor           procedure of derived table processing
+  @param lex              LEX for this thread
+  @param phases           phases to run derived tables/views through
 
-  RETURN
-    FALSE  OK
-    TRUE   Error
+  @return FALSE  OK
+  @return TRUE   Error
 */
-
 bool
-mysql_handle_derived(LEX *lex, bool (*processor)(THD*, LEX*, TABLE_LIST*))
+mysql_handle_derived(LEX *lex, uint phases)
 {
   bool res= FALSE;
-  if (lex->derived_tables)
+  THD *thd= lex->thd;
+  if (!lex->derived_tables)
+    return FALSE;
+
+  lex->thd->derived_tables_processing= TRUE;
+
+  for (uint phase= 0; phase < DT_PHASES && !res; phase++)
   {
-    lex->thd->derived_tables_processing= TRUE;
+    uint phase_flag= DT_INIT << phase;
+    if (phase_flag > phases)
+      break;
+    if (!(phases & phase_flag))
+      continue;
+    if (phase_flag >= DT_CREATE && !thd->fill_derived_tables())
+      break;
+
     for (SELECT_LEX *sl= lex->all_selects_list;
-	 sl;
+	 sl && !res;
 	 sl= sl->next_select_in_list())
     {
       for (TABLE_LIST *cursor= sl->get_table_list();
-	   cursor;
+	   cursor && !res;
 	   cursor= cursor->next_local)
       {
-	if ((res= (*processor)(lex->thd, lex, cursor)))
-	  goto out;
+        if (!cursor->is_view_or_derived() && phases == DT_MERGE_FOR_INSERT)
+          continue;
+        uint8 allowed_phases= (cursor->is_merged_derived() ? DT_PHASES_MERGE :
+                               DT_PHASES_MATERIALIZE | DT_MERGE_FOR_INSERT);
+        /*
+          Skip derived tables to which the phase isn't applicable.
+          TODO: mark derived at the parse time, later set it's type
+          (merged or materialized)
+        */
+        if ((phase_flag != DT_PREPARE && !(allowed_phases & phase_flag)) ||
+            (cursor->merged_for_insert && phase_flag != DT_REINIT &&
+             phase_flag != DT_PREPARE))
+          continue;
+	res= (*processors[phase])(lex->thd, lex, cursor);
       }
       if (lex->describe)
       {
@@ -71,30 +116,435 @@ mysql_handle_derived(LEX *lex, bool (*processor)(THD*, LEX*, TABLE_LIST*))
       }
     }
   }
-out:
+  lex->thd->derived_tables_processing= FALSE;
+  return res;
+}
+
+/*
+  Run through phases for the given derived table/view.
+
+  @param lex             LEX for this thread
+  @param derived         the derived table to handle
+  @param phase_map       phases to process tables/views through
+
+  @details
+
+  This function process the derived table (view) 'derived' to performs all
+  actions that are to be done on the table at the phases specified by
+  phase_map. The processing is carried out starting from the actions
+  performed at the earlier phases (those having smaller ordinal numbers).
+
+  @note
+  This function runs specified phases of the derived tables handling on the
+  given derived table/view. This function is used in the chain of calls:
+    SELECT_LEX::handle_derived ->
+      TABLE_LIST::handle_derived ->
+        mysql_handle_single_derived
+  This chain of calls implements the bottom-up handling of the derived tables:
+  i.e. most inner derived tables/views are handled first. This order is
+  required for the all phases except the merge and the create steps.
+  For the sake of code simplicity this order is kept for all phases.
+
+  @return FALSE ok
+  @return TRUE  error
+*/
+
+bool
+mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases)
+{
+  bool res= FALSE;
+  THD *thd= lex->thd;
+  uint8 allowed_phases= (derived->is_merged_derived() ? DT_PHASES_MERGE :
+                         DT_PHASES_MATERIALIZE);
+  if (!lex->derived_tables)
+    return FALSE;
+
+  lex->thd->derived_tables_processing= TRUE;
+
+  for (uint phase= 0; phase < DT_PHASES; phase++)
+  {
+    uint phase_flag= DT_INIT << phase;
+    if (phase_flag > phases)
+      break;
+    if (!(phases & phase_flag))
+      continue;
+    /* Skip derived tables to which the phase isn't applicable.  */
+    if (phase_flag != DT_PREPARE &&
+        !(allowed_phases & phase_flag))
+      continue;
+    if (phase_flag >= DT_CREATE && !thd->fill_derived_tables())
+      break;
+
+    if ((res= (*processors[phase])(lex->thd, lex, derived)))
+      break;
+  }
   lex->thd->derived_tables_processing= FALSE;
   return res;
 }
 
 
 /**
-  @brief Create temporary table structure (but do not fill it).
+  Run specified phases for derived tables/views in the given list
 
-  @param thd Thread handle
-  @param lex LEX for this thread
-  @param orig_table_list TABLE_LIST for the upper SELECT
+  @param lex        LEX for this thread
+  @param table_list list of derived tables/view to handle
+  @param phase_map  phases to process tables/views through
 
-  @details 
+  @details
+  This function runs phases specified by the 'phases_map' on derived
+  tables/views found in the 'dt_list' with help of the
+  TABLE_LIST::handle_derived function.
+  'lex' is passed as an argument to the TABLE_LIST::handle_derived.
 
-  This function is called before any command containing derived tables is
-  executed. Currently the function is used for derived tables, i.e.
+  @return FALSE ok
+  @return TRUE  error
+*/
 
-  - Anonymous derived tables, or 
-  - Named derived tables (aka views) with the @c TEMPTABLE algorithm.
-   
-  The table reference, contained in @c orig_table_list, is updated with the
-  fields of a new temporary table.
+bool
+mysql_handle_list_of_derived(LEX *lex, TABLE_LIST *table_list, uint phases)
+{
+  for (TABLE_LIST *tl= table_list; tl; tl= tl->next_local)
+  {
+    if (tl->is_view_or_derived() &&
+        tl->handle_derived(lex, phases))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/**
+  Merge a derived table/view into the embedding select
+
+  @param thd     thread handle
+  @param lex     LEX of the embedding query.
+  @param derived reference to the derived table.
+
+  @details
+  This function merges the given derived table / view into the parent select
+  construction. Any derived table/reference to view occurred in the FROM
+  clause of the embedding select is represented by a TABLE_LIST structure a
+  pointer to which is passed to the function as in the parameter 'derived'.
+  This structure contains  the number/map, alias, a link to SELECT_LEX of the
+  derived table and other info. If the 'derived' table is used in a nested join
+  then additionally the structure contains a reference to the ON expression
+  for this join.
+
+  The merge process results in elimination of the derived table (or the
+  reference to a view) such that:
+    - the FROM list of the derived table/view is wrapped into a nested join
+      after which the nest is added to the FROM list of the embedding select
+    - the WHERE condition of the derived table (view) is ANDed with the ON
+      condition attached to the table.
+
+  @note
+  Tables are merged into the leaf_tables list, original derived table is removed
+  from this list also. SELECT_LEX::table_list list is left untouched.
+  Where expression is merged with derived table's on_expr and can be found after
+  the merge through the SELECT_LEX::table_list.
+
+  Examples of the derived table/view merge:
+
+  Schema:
+  Tables: t1(f1), t2(f2), t3(f3)
+  View v1: SELECT f1 FROM t1 WHERE f1 < 1
+
+  Example with a view:
+    Before merge:
+
+    The query (Q1): SELECT f1,f2 FROM t2 LEFT JOIN v1 ON f1 = f2
+
+       (LEX of the main query)
+                 |
+           (select_lex)
+                 |
+         (FROM table list)
+                 |
+            (join list)= t2, v1
+                             / \
+                            /  (on_expr)= (f1 = f2)
+                            |
+                    (LEX of the v1 view)
+                            |
+                       (select_lex)= SELECT f1 FROM t1 WHERE f1 < 1
+
+
+    After merge:
+
+    The rewritten query Q1 (Q1'):
+      SELECT f1,f2 FROM t2 LEFT JOIN (t1) ON ((f1 = f2) and (f1 < 1))
+
+        (LEX of the main query)
+                   |
+             (select_lex)
+                   |
+           (FROM table list)
+                   |
+               (join list)= t2, (t1)
+                                    \
+                                   (on_expr)= (f1 = f2) and (f1 < 1)
+
+    In this example table numbers are assigned as follows:
+      (outer select): t2 - 1, v1 - 2
+      (inner select): t1 - 1
+    After the merge table numbers will be:
+      (outer select): t2 - 1, t1 - 2
+
+  Example with a derived table:
+    The query Q2:
+      SELECT f1,f2
+       FROM (SELECT f1 FROM t1, t3 WHERE f1=f3 and f1 < 1) tt, t2
+       WHERE f1 = f2
+
+    Before merge:
+              (LEX of the main query)
+                        |
+                  (select_lex)
+                  /           \
+       (FROM table list)   (WHERE clause)= (f1 = f2)
+                  |
+           (join list)= tt, t2
+                       / \
+                      /  (on_expr)= (empty)
+                     /
+           (select_lex)= SELECT f1 FROM t1, t3 WHERE f1 = f3 and f1 < 1
+
+    After merge:
+
+    The rewritten query Q2 (Q2'):
+      SELECT f1,f2
+       FROM (t1, t3) JOIN t2 ON (f1 = f3 and f1 < 1)
+       WHERE f1 = f2
+
+              (LEX of the main query)
+                        |
+                  (select_lex)
+                  /           \
+       (FROM table list)   (WHERE clause)= (f1 = f2)
+                 |
+          (join list)= t2, (t1, t3)
+                                   \
+                                 (on_expr)= (f1 = f3 and f1 < 1)
+
+  In this example table numbers are assigned as follows:
+    (outer select): tt - 1, t2 - 2
+    (inner select): t1 - 1, t3 - 2
+  After the merge table numbers will be:
+    (outer select): t1 - 1, t2 - 2, t3 - 3
+
+  @return FALSE if derived table/view were successfully merged.
+  @return TRUE if an error occur.
+*/
+
+bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  bool res= FALSE;
+  SELECT_LEX *dt_select= derived->get_single_select();
+  table_map map;
+  uint tablenr;
+  SELECT_LEX *parent_lex= derived->select_lex;
+  Query_arena *arena, backup;
+
+  if (derived->merged)
+    return FALSE;
+
+ if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+     thd->lex->sql_command == SQLCOM_DELETE_MULTI)
+   thd->save_prep_leaf_list= TRUE;
+
+  arena= thd->activate_stmt_arena_if_needed(&backup);  // For easier test
+  derived->merged= TRUE;
+
+  if (!derived->merged_for_insert || 
+      (derived->is_multitable() && 
+       (thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+        thd->lex->sql_command == SQLCOM_DELETE_MULTI)))
+  {
+    /*
+      Check whether there is enough free bits in table map to merge subquery.
+      If not - materialize it. This check isn't cached so when there is a big
+      and small subqueries, and the bigger one can't be merged it wouldn't
+      block the smaller one.
+    */
+    if (parent_lex->get_free_table_map(&map, &tablenr))
+    {
+      /* There is no enough table bits, fall back to materialization. */
+      derived->change_refs_to_fields();
+      derived->set_materialized_derived();
+      goto exit_merge;
+    }
+
+    if (dt_select->leaf_tables.elements + tablenr > MAX_TABLES)
+    {
+      /* There is no enough table bits, fall back to materialization. */
+      derived->change_refs_to_fields();
+      derived->set_materialized_derived();
+      goto exit_merge;
+    }
+
+    if (dt_select->options & OPTION_SCHEMA_TABLE)
+      parent_lex->options |= OPTION_SCHEMA_TABLE;
+
+    parent_lex->cond_count+= dt_select->cond_count;
+
+    if (!derived->get_unit()->prepared)
+    {
+      dt_select->leaf_tables.empty();
+      make_leaves_list(dt_select->leaf_tables, derived, TRUE, 0);
+    } 
+
+    derived->nested_join= (NESTED_JOIN*) thd->calloc(sizeof(NESTED_JOIN));
+    if (!derived->nested_join)
+    {
+      res= TRUE;
+      goto exit_merge;
+    }
+
+    /* Merge derived table's subquery in the parent select. */
+    if (parent_lex->merge_subquery(thd, derived, dt_select, tablenr, map))
+    {
+      res= TRUE;
+      goto exit_merge;
+    }
+
+    /*
+      exclude select lex so it doesn't show up in explain.
+      do this only for derived table as for views this is already done.
+
+      From sql_view.cc
+        Add subqueries units to SELECT into which we merging current view.
+        unit(->next)* chain starts with subqueries that are used by this
+        view and continues with subqueries that are used by other views.
+        We must not add any subquery twice (otherwise we'll form a loop),
+        to do this we remember in end_unit the first subquery that has
+        been already added.
+    */
+    derived->get_unit()->exclude_level();
+    if (parent_lex->join) 
+      parent_lex->join->table_count+= dt_select->join->table_count - 1;
+  }
+  if (derived->get_unit()->prepared)
+  {
+    Item *expr= derived->on_expr;
+    expr= and_conds(expr, dt_select->join ? dt_select->join->conds : 0);
+    if (expr && (derived->prep_on_expr || expr != derived->on_expr))
+    {
+      derived->on_expr= expr;
+      derived->prep_on_expr= expr->copy_andor_structure(thd);
+    }
+    if (derived->on_expr &&
+        ((!derived->on_expr->fixed &&
+          derived->on_expr->fix_fields(thd, &derived->on_expr)) ||
+          derived->on_expr->check_cols(1)))
+    {
+      res= TRUE; /* purecov: inspected */
+      goto exit_merge;
+    }
+    // Update used tables cache according to new table map
+    if (derived->on_expr)
+    {
+      derived->on_expr->fix_after_pullout(parent_lex, &derived->on_expr);
+      fix_list_after_tbl_changes(parent_lex, &derived->nested_join->join_list);
+    }
+  }
+
+exit_merge:
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  return res;
+}
+
+
+/**
+  Merge a view for the embedding INSERT/UPDATE/DELETE
+
+  @param thd     thread handle
+  @param lex     LEX of the embedding query.
+  @param derived reference to the derived table.
+
+  @details
+  This function substitutes the derived table for the first table from
+  the query of the derived table thus making it a correct target table for the
+  INSERT/UPDATE/DELETE statements. As this operation is correct only for
+  single table views only, for multi table views this function does nothing.
+  The derived parameter isn't checked to be a view as derived tables aren't
+  allowed for INSERT/UPDATE/DELETE statements.
+
+  @return FALSE if derived table/view were successfully merged.
+  @return TRUE if an error occur.
+*/
+
+bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  if (derived->merged_for_insert)
+    return FALSE;
+  if (derived->is_materialized_derived())
+    return mysql_derived_prepare(thd, lex, derived);
+  if (!derived->is_multitable())
+  {
+    if (!derived->updatable)
+      return derived->create_field_translation(thd);
+    if (derived->merge_underlying_list)
+    {
+      derived->table= derived->merge_underlying_list->table;
+      derived->schema_table= derived->merge_underlying_list->schema_table;
+      derived->merged_for_insert= TRUE;
+    }
+  }  
+  return FALSE;
+}
+
+
+/*
+  Initialize a derived table/view
+
+  @param thd	     Thread handle
+  @param lex         LEX of the embedding query.
+  @param derived     reference to the derived table.
+
+  @detail
+  Fill info about derived table/view without preparing an
+  underlying select. Such as: create a field translation for views, mark it as
+  a multitable if it is and so on.
+
+  @return
+    false  OK
+    true   Error
+*/
 
+
+bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  SELECT_LEX_UNIT *unit= derived->get_unit();
+  DBUG_ENTER("mysql_derived_init");
+
+  // Skip already prepared views/DT
+  if (!unit || unit->prepared)
+    DBUG_RETURN(FALSE);
+
+  DBUG_RETURN(derived->init_derived(thd, TRUE));
+}
+
+
+/*
+  Create temporary table structure (but do not fill it)
+
+  @param thd	     Thread handle
+  @param lex         LEX of the embedding query.
+  @param derived     reference to the derived table.
+
+  @detail
+  Prepare underlying select for a derived table/view. To properly resolve
+  names in the embedding query the TABLE structure is created. Actual table
+  is created later by the mysql_derived_create function.
+
+  This function is called before any command containing derived table
+  is executed. All types of derived tables are handled by this function:
+  - Anonymous derived tables, or
+  - Named derived tables (aka views).
+
+  The table reference, contained in @c derived, is updated with the
+  fields of a new temporary table.
   Derived tables are stored in @c thd->derived_tables and closed by
   close_thread_tables().
 
@@ -118,210 +568,362 @@ out:
   the state of privilege checking (GRANT_INFO struct) is copied as-is to the
   temporary table.
 
-  This function implements a signature called "derived table processor", and
-  is passed as a function pointer to mysql_handle_derived().
+  Only the TABLE structure is created here, actual table is created by the
+  mysql_derived_create function.
 
   @note This function sets @c SELECT_ACL for @c TEMPTABLE views as well as
   anonymous derived tables, but this is ok since later access checking will
   distinguish between them.
 
-  @see mysql_handle_derived(), mysql_derived_filling(), GRANT_INFO
+  @see mysql_handle_derived(), mysql_derived_fill(), GRANT_INFO
 
   @return
     false  OK
     true   Error
 */
 
-bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *orig_table_list)
+bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
-  SELECT_LEX_UNIT *unit= orig_table_list->derived;
-  ulonglong create_options;
+  SELECT_LEX_UNIT *unit= derived->get_unit();
   DBUG_ENTER("mysql_derived_prepare");
   bool res= FALSE;
-  if (unit)
+
+  // Skip already prepared views/DT
+  if (!unit || unit->prepared ||
+      (derived->merged_for_insert && 
+       !(derived->is_multitable() &&
+         (thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+          thd->lex->sql_command == SQLCOM_DELETE_MULTI))))
+    DBUG_RETURN(FALSE);
+
+  Query_arena *arena= thd->stmt_arena, backup;
+  if (arena->is_conventional())
+    arena= 0;                                   // For easier test
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+
+  SELECT_LEX *first_select= unit->first_select();
+
+  /* prevent name resolving out of derived table */
+  for (SELECT_LEX *sl= first_select; sl; sl= sl->next_select())
   {
-    SELECT_LEX *first_select= unit->first_select();
-    TABLE *table= 0;
-    select_union *derived_result;
-
-    /* prevent name resolving out of derived table */
-    for (SELECT_LEX *sl= first_select; sl; sl= sl->next_select())
-      sl->context.outer_context= 0;
-
-    if (!(derived_result= new select_union))
-      DBUG_RETURN(TRUE); // out of memory
-
-    lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_DERIVED;
-    // st_select_lex_unit::prepare correctly work for single select
-    if ((res= unit->prepare(thd, derived_result, 0)))
-      goto exit;
-    lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_DERIVED;
-    if ((res= check_duplicate_names(unit->types, 0)))
-      goto exit;
-
-    create_options= (first_select->options | thd->variables.option_bits |
-                     TMP_TABLE_ALL_COLUMNS);
-    /*
-      Temp table is created so that it hounours if UNION without ALL is to be 
-      processed
-
-      As 'distinct' parameter we always pass FALSE (0), because underlying
-      query will control distinct condition by itself. Correct test of
-      distinct underlying query will be is_union &&
-      !unit->union_distinct->next_select() (i.e. it is union and last distinct
-      SELECT is last SELECT of UNION).
-    */
-    if ((res= derived_result->create_result_table(thd, &unit->types, FALSE,
-                                                 create_options,
-                                                 orig_table_list->alias)))
-      goto exit;
+    sl->context.outer_context= 0;
+    // Prepare underlying views/DT first.
+    sl->handle_derived(lex, DT_PREPARE);
+  }
 
-    table= derived_result->table;
+  unit->derived= derived;
+
+  if (!(derived->derived_result= new select_union))
+    DBUG_RETURN(TRUE); // out of memory
+
+  lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_DERIVED;
+  // st_select_lex_unit::prepare correctly work for single select
+  if ((res= unit->prepare(thd, derived->derived_result, 0)))
+    goto exit;
+  lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_DERIVED;
+  if ((res= check_duplicate_names(unit->types, 0)))
+    goto exit;
+
+  /*
+    Check whether we can merge this derived table into main select.
+    Depending on the result field translation will or will not
+    be created.
+  */
+  if (derived->init_derived(thd, FALSE))
+    goto exit;
+
+  /*
+    Temp table is created so that it hounours if UNION without ALL is to be 
+    processed
+
+    As 'distinct' parameter we always pass FALSE (0), because underlying
+    query will control distinct condition by itself. Correct test of
+    distinct underlying query will be is_union &&
+    !unit->union_distinct->next_select() (i.e. it is union and last distinct
+    SELECT is last SELECT of UNION).
+  */
+  thd->create_tmp_table_for_derived= TRUE;
+  if (derived->derived_result->create_result_table(thd, &unit->types, FALSE,
+                                                (first_select->options |
+                                                 thd->variables.option_bits |
+                                                 TMP_TABLE_ALL_COLUMNS),
+                                                derived->alias,
+                                                FALSE, FALSE))
+  { 
+    thd->create_tmp_table_for_derived= FALSE;
+    goto exit;
+  }
+  thd->create_tmp_table_for_derived= FALSE;
+
+  derived->table= derived->derived_result->table;
+  if (derived->is_derived() && derived->is_merged_derived())
+    first_select->mark_as_belong_to_derived(derived);
 
 exit:
-    /* Hide "Unknown column" or "Unknown function" error */
-    if (orig_table_list->view)
+  /* Hide "Unknown column" or "Unknown function" error */
+  if (derived->view)
+  {
+    if (thd->is_error() &&
+        (thd->stmt_da->sql_errno() == ER_BAD_FIELD_ERROR ||
+        thd->stmt_da->sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION ||
+        thd->stmt_da->sql_errno() == ER_SP_DOES_NOT_EXIST))
     {
-      if (thd->is_error() &&
-          (thd->stmt_da->sql_errno() == ER_BAD_FIELD_ERROR ||
-          thd->stmt_da->sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION ||
-          thd->stmt_da->sql_errno() == ER_SP_DOES_NOT_EXIST))
-      {
-        thd->clear_error();
-        my_error(ER_VIEW_INVALID, MYF(0), orig_table_list->db,
-                 orig_table_list->table_name);
-      }
+      thd->clear_error();
+      my_error(ER_VIEW_INVALID, MYF(0), derived->db,
+               derived->table_name);
     }
+  }
 
-    /*
-      if it is preparation PS only or commands that need only VIEW structure
-      then we do not need real data and we can skip execution (and parameters
-      is not defined, too)
-    */
-    if (res)
-    {
-      if (table)
-	free_tmp_table(thd, table);
-      delete derived_result;
-    }
+  /*
+    if it is preparation PS only or commands that need only VIEW structure
+    then we do not need real data and we can skip execution (and parameters
+    is not defined, too)
+  */
+  if (res)
+  {
+    if (derived->table)
+      free_tmp_table(thd, derived->table);
+    delete derived->derived_result;
+  }
+  else
+  {
+    TABLE *table= derived->table;
+    table->derived_select_number= first_select->select_number;
+    table->s->tmp_table= NON_TRANSACTIONAL_TMP_TABLE;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+    if (derived->referencing_view)
+      table->grant= derived->grant;
     else
     {
-      if (!thd->fill_derived_tables())
-      {
-	delete derived_result;
-	derived_result= NULL;
-      }
-      orig_table_list->derived_result= derived_result;
-      orig_table_list->table= table;
-      orig_table_list->table_name=        table->s->table_name.str;
-      orig_table_list->table_name_length= table->s->table_name.length;
-      table->derived_select_number= first_select->select_number;
-      table->s->tmp_table= NON_TRANSACTIONAL_TMP_TABLE;
-#ifndef NO_EMBEDDED_ACCESS_CHECKS
-      if (orig_table_list->referencing_view)
-        table->grant= orig_table_list->grant;
-      else
-        table->grant.privilege= SELECT_ACL;
-#endif
-      orig_table_list->db= (char *)"";
-      orig_table_list->db_length= 0;
-      // Force read of table stats in the optimizer
-      table->file->info(HA_STATUS_VARIABLE);
-      /* Add new temporary table to list of open derived tables */
-      table->next= thd->derived_tables;
-      thd->derived_tables= table;
+      table->grant.privilege= SELECT_ACL;
+      if (derived->is_derived())
+        derived->grant.privilege= SELECT_ACL;
     }
+#endif
+    /* Add new temporary table to list of open derived tables */
+    table->next= thd->derived_tables;
+    thd->derived_tables= table;
   }
-  else if (orig_table_list->merge_underlying_list)
-    orig_table_list->set_underlying_merge();
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
   DBUG_RETURN(res);
 }
 
 
-/*
-  fill derived table
-
-  SYNOPSIS
-    mysql_derived_filling()
-    thd			Thread handle
-    lex                 LEX for this thread
-    unit                node that contains all SELECT's for derived tables
-    orig_table_list     TABLE_LIST for the upper SELECT
-
-  IMPLEMENTATION
-    Derived table is resolved with temporary table. It is created based on the
-    queries defined. After temporary table is filled, if this is not EXPLAIN,
-    then the entire unit / node is deleted. unit is deleted if UNION is used
-    for derived table and node is deleted is it is a  simple SELECT.
-    If you use this function, make sure it's not called at prepare.
-    Due to evaluation of LIMIT clause it can not be used at prepared stage.
-
-  RETURN
-    FALSE  OK
-    TRUE   Error
+/**
+  Runs optimize phase for a derived table/view.
+
+  @param thd     thread handle
+  @param lex     LEX of the embedding query.
+  @param derived reference to the derived table.
+
+  @details
+  Runs optimize phase for given 'derived' derived table/view.
+  If optimizer finds out that it's of the type "SELECT a_constant" then this
+  functions also materializes it.
+
+  @return FALSE ok.
+  @return TRUE if an error occur.
 */
 
-bool mysql_derived_filling(THD *thd, LEX *lex, TABLE_LIST *orig_table_list)
+bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
-  TABLE *table= orig_table_list->table;
-  SELECT_LEX_UNIT *unit= orig_table_list->derived;
+  SELECT_LEX_UNIT *unit= derived->get_unit();
+  SELECT_LEX *first_select= unit->first_select();
+  SELECT_LEX *save_current_select= lex->current_select;
+
   bool res= FALSE;
 
-  /*check that table creation pass without problem and it is derived table */
-  if (table && unit)
+  if (unit->optimized)
+    return FALSE;
+  lex->current_select= first_select;
+
+  if (unit->is_union())
   {
-    SELECT_LEX *first_select= unit->first_select();
-    select_union *derived_result= orig_table_list->derived_result;
-    SELECT_LEX *save_current_select= lex->current_select;
-    if (unit->is_union())
-    {
-      // execute union without clean up
-      res= unit->exec();
-    }
-    else
+    // optimize union without execution
+    res= unit->optimize();
+  }
+  else if (unit->derived)
+  {
+    if (!derived->is_merged_derived())
     {
-      unit->set_limit(first_select);
-      if (unit->select_limit_cnt == HA_POS_ERROR)
-	first_select->options&= ~OPTION_FOUND_ROWS;
-
-      lex->current_select= first_select;
-      res= mysql_select(thd, &first_select->ref_pointer_array,
-			first_select->table_list.first,
-			first_select->with_wild,
-			first_select->item_list, first_select->where,
-			(first_select->order_list.elements+
-			 first_select->group_list.elements),
-			first_select->order_list.first,
-			first_select->group_list.first,
-			first_select->having, (ORDER*) NULL,
-			(first_select->options | thd->variables.option_bits |
-			 SELECT_NO_UNLOCK),
-			derived_result, unit, first_select);
+      JOIN *join= first_select->join;
+      unit->optimized= TRUE;
+      if ((res= join->optimize()))
+        goto err;
+      if (join->table_count == join->const_tables)
+        derived->fill_me= TRUE;
     }
-
-    if (!res)
+  }
+  /*
+    Materialize derived tables/views of the "SELECT a_constant" type.
+    Such tables should be materialized at the optimization phase for
+    correct constant evaluation.
+  */
+  if (!res && derived->fill_me && !derived->merged_for_insert)
+  {
+    if (derived->is_merged_derived())
     {
-      /*
-        Here we entirely fix both TABLE_LIST and list of SELECT's as
-        there were no derived tables
-      */
-      if (derived_result->flush())
-        res= TRUE;
+      derived->change_refs_to_fields();
+      derived->set_materialized_derived();
     }
-    lex->current_select= save_current_select;
+    if ((res= mysql_derived_create(thd, lex, derived)))
+      goto err;
+    if ((res= mysql_derived_fill(thd, lex, derived)))
+      goto err;
   }
+err:
+  lex->current_select= save_current_select;
   return res;
 }
 
 
 /**
-   Cleans up the SELECT_LEX_UNIT for the derived table (if any).
+  Actually create result table for a materialized derived table/view.
+
+  @param thd     thread handle
+  @param lex     LEX of the embedding query.
+  @param derived reference to the derived table.
+
+  @details
+  This function actually creates the result table for given 'derived'
+  table/view, but it doesn't fill it.
+  'thd' and 'lex' parameters are not used  by this function.
+
+  @return FALSE ok.
+  @return TRUE if an error occur.
+*/
+
+bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  TABLE *table= derived->table;
+  SELECT_LEX_UNIT *unit= derived->get_unit();
+
+  if (table->created)
+    return FALSE;
+  select_union *result= (select_union*)unit->result;
+  if (table->s->db_type() == TMP_ENGINE_HTON)
+  {
+    if (create_internal_tmp_table(table, result->tmp_table_param.keyinfo,
+                                  result->tmp_table_param.start_recinfo,
+                                  &result->tmp_table_param.recinfo,
+                                  (unit->first_select()->options |
+                                   thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS),
+                                  thd->variables.big_tables))
+      return(TRUE);
+  }
+  if (open_tmp_table(table))
+    return TRUE;
+  table->file->extra(HA_EXTRA_WRITE_CACHE);
+  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  return FALSE;
+}
+
+
+/*
+  Execute subquery of a materialized derived table/view and fill the result
+  table.
+
+  @param thd      Thread handle
+  @param lex      LEX for this thread
+  @param derived  reference to the derived table.
+
+  @details
+  Execute subquery of given 'derived' table/view and fill the result
+  table. After result table is filled, if this is not the EXPLAIN statement,
+  the entire unit / node is deleted. unit is deleted if UNION is used
+  for derived table and node is deleted is it is a simple SELECT.
+  'lex' is unused and 'thd' is passed as an argument to an underlying function.
+
+  @note
+  If you use this function, make sure it's not called at prepare.
+  Due to evaluation of LIMIT clause it can not be used at prepared stage.
+
+  @return FALSE  OK
+  @return TRUE   Error
 */
 
-bool mysql_derived_cleanup(THD *thd, LEX *lex, TABLE_LIST *derived)
+bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
-  SELECT_LEX_UNIT *unit= derived->derived;
-  if (unit)
+  SELECT_LEX_UNIT *unit= derived->get_unit();
+  bool res= FALSE;
+
+  if (unit->executed && !unit->uncacheable && !unit->describe)
+    return FALSE;
+  /*check that table creation passed without problems. */
+  DBUG_ASSERT(derived->table && derived->table->created);
+  SELECT_LEX *first_select= unit->first_select();
+  select_union *derived_result= derived->derived_result;
+  SELECT_LEX *save_current_select= lex->current_select;
+  if (unit->is_union())
+  {
+    // execute union without clean up
+    res= unit->exec();
+  }
+  else
+  {
+    unit->set_limit(first_select);
+    if (unit->select_limit_cnt == HA_POS_ERROR)
+      first_select->options&= ~OPTION_FOUND_ROWS;
+
+    lex->current_select= first_select;
+    res= mysql_select(thd, &first_select->ref_pointer_array,
+                      first_select->table_list.first,
+                      first_select->with_wild,
+                      first_select->item_list, first_select->where,
+                      (first_select->order_list.elements+
+                       first_select->group_list.elements),
+                      first_select->order_list.first,
+                      first_select->group_list.first,
+                      first_select->having, (ORDER*) NULL,
+                      (first_select->options |thd->variables.option_bits |
+                       SELECT_NO_UNLOCK),
+                      derived_result, unit, first_select);
+  }
+
+  if (!res)
+  {
+    if (derived_result->flush())
+      res= TRUE;
+    unit->executed= TRUE;
+  }
+  if (res || !lex->describe) 
     unit->cleanup();
-  return false;
+  lex->current_select= save_current_select;
+
+  return res;
 }
+
+
+/**
+  Re-initialize given derived table/view for the next execution.
+
+  @param  thd         thread handle
+  @param  lex         LEX for this thread
+  @param  derived     reference to the derived table.
+
+  @details
+  Re-initialize given 'derived' table/view for the next execution.
+  All underlying views/derived tables are recursively reinitialized prior
+  to re-initialization of given derived table.
+  'thd' and 'lex' are passed as arguments to called functions.
+
+  @return FALSE  OK
+  @return TRUE   Error
+*/
+
+bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  st_select_lex_unit *unit= derived->get_unit();
+
+  if (derived->table)
+    derived->merged_for_insert= FALSE;
+  unit->unclean();
+  unit->types.empty();
+  /* for derived tables & PS (which can't be reset by Item_subquery) */
+  unit->reinit_exec_mechanism();
+  unit->set_thd(thd);
+  return FALSE;
+}
+
diff --git a/sql/sql_derived.h b/sql/sql_derived.h
index 6c9fe2e50ad..f232445879e 100644
--- a/sql/sql_derived.h
+++ b/sql/sql_derived.h
@@ -20,11 +20,9 @@ struct TABLE_LIST;
 class THD;
 struct LEX;
 
-bool mysql_handle_derived(LEX *lex, bool (*processor)(THD *thd,
-                                                      LEX *lex,
-                                                      TABLE_LIST *table));
-bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *t);
-bool mysql_derived_filling(THD *thd, LEX *lex, TABLE_LIST *t);
+bool mysql_handle_derived(LEX *lex, uint phases);
+bool mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases);
+bool mysql_handle_list_of_derived(LEX *lex, TABLE_LIST *dt_list, uint phases);
 
 /**
    Cleans up the SELECT_LEX_UNIT for the derived table (if any).
diff --git a/sql/sql_error.cc b/sql/sql_error.cc
index 443f3b7a33e..06da6250e71 100644
--- a/sql/sql_error.cc
+++ b/sql/sql_error.cc
@@ -603,6 +603,9 @@ void push_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
 
   (void) thd->raise_condition(code, NULL, level, msg);
 
+  /* Make sure we also count warnings pushed after calling set_ok_status(). */
+  thd->stmt_da->increment_warning();
+
   DBUG_VOID_RETURN;
 }
 
diff --git a/sql/sql_error.h b/sql/sql_error.h
index 955b3767847..00ade934226 100644
--- a/sql/sql_error.h
+++ b/sql/sql_error.h
@@ -90,6 +90,13 @@ public:
     return m_statement_warn_count;
   }
 
+  /* Used to count any warnings pushed after calling set_ok_status(). */
+  void increment_warning()
+  {
+    if (m_status != DA_EMPTY)
+      m_statement_warn_count++;
+  }
+
   Diagnostics_area() { reset_diagnostics_area(); }
 
 private:
@@ -497,34 +504,76 @@ private:
 extern char *err_conv(char *buff, uint to_length, const char *from,
                       uint from_length, CHARSET_INFO *from_cs);
 
-class ErrConvString
+class ErrConv
+{
+protected:
+  mutable char err_buffer[MYSQL_ERRMSG_SIZE];
+public:
+  ErrConv() {}
+  virtual ~ErrConv() {}
+  virtual const char *ptr() const = 0;
+};
+
+class ErrConvString : public ErrConv
+{
+  const char *str;
+  size_t len;
+  CHARSET_INFO *cs;
+public:
+  ErrConvString(const char *str_arg, size_t len_arg, CHARSET_INFO *cs_arg)
+    : ErrConv(), str(str_arg), len(len_arg), cs(cs_arg) {}
+  ErrConvString(String *s)
+    : ErrConv(), str(s->ptr()), len(s->length()), cs(s->charset()) {}
+  const char *ptr() const
+  { return err_conv(err_buffer, sizeof(err_buffer), str, len, cs); }
+};
+
+class ErrConvInteger : public ErrConv
 {
-  char err_buffer[MYSQL_ERRMSG_SIZE];
+  longlong num;
 public:
+  ErrConvInteger(longlong num_arg) : ErrConv(), num(num_arg) {}
+  const char *ptr() const
+  { return llstr(num, err_buffer); }
+};
 
-  ErrConvString(String *str)
+class ErrConvDouble: public ErrConv
+{
+  double num;
+public:
+  ErrConvDouble(double num_arg) : ErrConv(), num(num_arg) {}
+  const char *ptr() const
   {
-    (void) err_conv(err_buffer, sizeof(err_buffer), str->ptr(),
-                    str->length(), str->charset());
+    my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(err_buffer), err_buffer, 0);
+    return err_buffer;
   }
+};
 
-  ErrConvString(const char *str, CHARSET_INFO* cs)
+class ErrConvTime : public ErrConv
+{
+  const MYSQL_TIME *ltime;
+public:
+  ErrConvTime(const MYSQL_TIME *ltime_arg) : ErrConv(), ltime(ltime_arg) {}
+  const char *ptr() const
   {
-    (void) err_conv(err_buffer, sizeof(err_buffer),
-                    str, strlen(str), cs);
+    my_TIME_to_str(ltime, err_buffer, AUTO_SEC_PART_DIGITS);
+    return err_buffer;
   }
+};
 
-  ErrConvString(const char *str, uint length, CHARSET_INFO* cs)
+class ErrConvDecimal : public ErrConv
+{
+  const decimal_t *d;
+public:
+  ErrConvDecimal(const decimal_t *d_arg) : ErrConv(), d(d_arg) {}
+  const char *ptr() const
   {
-    (void) err_conv(err_buffer, sizeof(err_buffer),
-                    str, length, cs);
+    int len= sizeof(err_buffer);
+    decimal2string(d, err_buffer, &len, 0, 0, ' ');
+    return err_buffer;
   }
-
-  ~ErrConvString() { };
-  char *ptr() { return err_buffer; }
 };
 
-
 void push_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
                   uint code, const char *msg);
 void push_warning_printf(THD *thd, MYSQL_ERROR::enum_warning_level level,
diff --git a/sql/sql_expression_cache.cc b/sql/sql_expression_cache.cc
new file mode 100644
index 00000000000..e65ec3c22b0
--- /dev/null
+++ b/sql/sql_expression_cache.cc
@@ -0,0 +1,324 @@
+/* Copyright (C) 2010-2011 Monty Program Ab & Oleksandr Byelkin
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include "sql_base.h"
+#include "sql_select.h"
+#include "sql_expression_cache.h"
+
+/**
+  Minimum hit ration to proceed on disk if in memory table overflowed.
+  hit_rate = hit / (miss + hit);
+*/
+#define EXPCACHE_MIN_HIT_RATE_FOR_DISK_TABLE 0.7
+/**
+  Minimum hit ratio to keep in memory table (do not switch cache off)
+  hit_rate = hit / (miss + hit);
+*/
+#define EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE  0.2
+/**
+  Number of cache miss to check hit ratio (maximum cache performance
+  impact in the case when the cache is not applicable)
+*/
+#define EXPCACHE_CHECK_HIT_RATIO_AFTER 200
+
+/*
+  Expression cache is used only for caching subqueries now, so its statistic
+  variables we call subquery_cache*.
+*/
+ulong subquery_cache_miss, subquery_cache_hit;
+
+Expression_cache_tmptable::Expression_cache_tmptable(THD *thd,
+                                                     List<Item> &dependants,
+                                                     Item *value)
+  :cache_table(NULL), table_thd(thd), items(dependants), val(value),
+   hit(0), miss(0), inited (0)
+{
+  DBUG_ENTER("Expression_cache_tmptable::Expression_cache_tmptable");
+  DBUG_VOID_RETURN;
+};
+
+
+/**
+  Disable cache
+*/
+
+void Expression_cache_tmptable::disable_cache()
+{
+  if (cache_table->file->inited)
+    cache_table->file->ha_index_end();
+  free_tmp_table(table_thd, cache_table);
+  cache_table= NULL;
+}
+
+
+/**
+  Field enumerator for TABLE::add_tmp_key
+
+  @param arg             reference variable with current field number
+
+  @return field number
+*/
+
+static uint field_enumerator(uchar *arg)
+{
+  return ((uint*)arg)[0]++;
+}
+
+
+/**
+  Initialize temporary table and auxiliary structures for the expression
+  cache
+
+  @details
+  The function creates a temporary table for the expression cache, defines
+  the search index and initializes auxiliary search structures used to check
+  whether a given set of of values of the expression parameters is in some
+  cache entry.
+*/
+
+void Expression_cache_tmptable::init()
+{
+  List_iterator<Item> li(items);
+  Item_iterator_list it(li);
+  uint field_counter;
+  DBUG_ENTER("Expression_cache_tmptable::init");
+  DBUG_ASSERT(!inited);
+  inited= TRUE;
+  cache_table= NULL;
+
+  if (items.elements == 0)
+  {
+    DBUG_PRINT("info", ("All parameters were removed by optimizer."));
+    DBUG_VOID_RETURN;
+  }
+
+  /* add result field */
+  items.push_front(val);
+
+  cache_table_param.init();
+  /* dependent items and result */
+  cache_table_param.field_count= items.elements;
+  /* postpone table creation to index description */
+  cache_table_param.skip_create_table= 1;
+
+  if (!(cache_table= create_tmp_table(table_thd, &cache_table_param,
+                                      items, (ORDER*) NULL,
+                                      FALSE, TRUE,
+                                      ((table_thd->variables.option_bits |
+                                        TMP_TABLE_ALL_COLUMNS) &
+                                        ~TMP_TABLE_FORCE_MYISAM),
+                                      HA_POS_ERROR,
+                                      (char *)"subquery-cache-table",
+                                      TRUE)))
+  {
+    DBUG_PRINT("error", ("create_tmp_table failed, caching switched off"));
+    DBUG_VOID_RETURN;
+  }
+
+  if (cache_table->s->db_type() != heap_hton)
+  {
+    DBUG_PRINT("error", ("we need only heap table"));
+    goto error;
+  }
+
+  field_counter= 1;
+
+  if (cache_table->alloc_keys(1) ||
+      cache_table->add_tmp_key(0, items.elements - 1, &field_enumerator,
+                                (uchar*)&field_counter, TRUE) ||
+      ref.tmp_table_index_lookup_init(table_thd, cache_table->key_info, it,
+                                      TRUE, 1 /* skip result field*/))
+  {
+    DBUG_PRINT("error", ("creating index failed"));
+    goto error;
+  }
+  cache_table->s->keys= 1;
+  ref.null_rejecting= 1;
+  ref.disable_cache= FALSE;
+  ref.has_record= 0;
+  ref.use_count= 0;
+
+
+  if (open_tmp_table(cache_table))
+  {
+    DBUG_PRINT("error", ("Opening (creating) temporary table failed"));
+    goto error;
+  }
+
+  if (!(cached_result= new Item_field(cache_table->field[0])))
+  {
+    DBUG_PRINT("error", ("Creating Item_field failed"));
+    goto error;
+  }
+
+  DBUG_VOID_RETURN;
+
+error:
+  disable_cache();
+  DBUG_VOID_RETURN;
+}
+
+
+Expression_cache_tmptable::~Expression_cache_tmptable()
+{
+  /* Add accumulated statistics */
+  statistic_add(subquery_cache_miss, miss, &LOCK_status);
+  statistic_add(subquery_cache_hit, hit, &LOCK_status);
+
+  if (cache_table)
+    disable_cache();
+}
+
+
+/**
+  Check if a given set of parameters of the expression is in the cache
+
+  @param [out] value     the expression value found in the cache if any
+
+  @details
+  For a given set of the parameters of the expression the function
+  checks whether it can be found in some entry of the cache. If so
+  the function returns the result of the expression extracted from
+  the cache.
+
+  @retval Expression_cache::HIT if the set of parameters is in the cache
+  @retval Expression_cache::MISS - otherwise
+*/
+
+Expression_cache::result Expression_cache_tmptable::check_value(Item **value)
+{
+  int res;
+  DBUG_ENTER("Expression_cache_tmptable::check_value");
+
+  if (cache_table)
+  {
+    DBUG_PRINT("info", ("status: %u  has_record %u",
+                        (uint)cache_table->status, (uint)ref.has_record));
+    if ((res= join_read_key2(table_thd, NULL, cache_table, &ref)) == 1)
+      DBUG_RETURN(ERROR);
+
+    if (res)
+    {
+      if (((++miss) == EXPCACHE_CHECK_HIT_RATIO_AFTER) &&
+          ((double)hit / ((double)hit + miss)) <
+          EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE)
+      {
+        DBUG_PRINT("info",
+                   ("Early check: hit rate is not so good to keep the cache"));
+        disable_cache();
+      }
+
+      DBUG_RETURN(MISS);
+    }
+
+    hit++;
+    *value= cached_result;
+    DBUG_RETURN(Expression_cache::HIT);
+  }
+  DBUG_RETURN(Expression_cache::MISS);
+}
+
+
+/**
+  Put a new entry into the expression cache
+
+  @param value     the result of the expression to be put into the cache
+
+  @details
+  The function evaluates 'value' and puts the result into the cache as the
+  result of the expression for the current set of parameters.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+my_bool Expression_cache_tmptable::put_value(Item *value)
+{
+  int error;
+  DBUG_ENTER("Expression_cache_tmptable::put_value");
+  DBUG_ASSERT(inited);
+
+  if (!cache_table)
+  {
+    DBUG_PRINT("info", ("No table so behave as we successfully put value"));
+    DBUG_RETURN(FALSE);
+  }
+
+  *(items.head_ref())= value;
+  fill_record(table_thd, cache_table->field, items, TRUE, TRUE);
+  if (table_thd->is_error())
+    goto err;;
+
+  if ((error= cache_table->file->ha_write_tmp_row(cache_table->record[0])))
+  {
+    /* create_myisam_from_heap will generate error if needed */
+    if (cache_table->file->is_fatal_error(error, HA_CHECK_DUP))
+      goto err;
+    else
+    {
+      double hit_rate= ((double)hit / ((double)hit + miss));
+      DBUG_ASSERT(miss > 0);
+      if (hit_rate < EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE)
+      {
+        DBUG_PRINT("info", ("hit rate is not so good to keep the cache"));
+        disable_cache();
+        DBUG_RETURN(FALSE);
+      }
+      else if (hit_rate < EXPCACHE_MIN_HIT_RATE_FOR_DISK_TABLE)
+      {
+        DBUG_PRINT("info", ("hit rate is not so good to go to disk"));
+        if (cache_table->file->ha_delete_all_rows() ||
+            cache_table->file->ha_write_tmp_row(cache_table->record[0]))
+          goto err;
+      }
+      else
+      {
+        if (create_internal_tmp_table_from_heap(table_thd, cache_table,
+                                                cache_table_param.start_recinfo,
+                                                &cache_table_param.recinfo,
+                                                error, 1))
+          goto err;
+      }
+    }
+  }
+  cache_table->status= 0; /* cache_table->record contains an existed record */
+  ref.has_record= TRUE; /* the same as above */
+  DBUG_PRINT("info", ("has_record: TRUE  status: 0"));
+
+  DBUG_RETURN(FALSE);
+
+err:
+  disable_cache();
+  DBUG_RETURN(TRUE);
+}
+
+
+void Expression_cache_tmptable::print(String *str, enum_query_type query_type)
+{
+  List_iterator<Item> li(items);
+  Item *item;
+  bool is_first= TRUE;
+
+  str->append('<');
+  li++;  // skip result field
+  while ((item= li++))
+  {
+    if (!is_first)
+      str->append(',');
+    item->print(str, query_type);
+    is_first= FALSE;
+  }
+  str->append('>');
+}
diff --git a/sql/sql_expression_cache.h b/sql/sql_expression_cache.h
new file mode 100644
index 00000000000..32aecc61dc9
--- /dev/null
+++ b/sql/sql_expression_cache.h
@@ -0,0 +1,95 @@
+#ifndef SQL_EXPRESSION_CACHE_INCLUDED
+#define SQL_EXPRESSION_CACHE_INCLUDED
+
+#include "sql_select.h"
+
+/**
+  Interface for expression cache
+
+  @note
+  Parameters of an expression cache interface are set on the creation of the
+  cache. They are passed when a cache object of the implementation class is
+  constructed. That's why they are not visible in this interface.
+*/
+
+extern ulong subquery_cache_miss, subquery_cache_hit;
+
+class Expression_cache :public Sql_alloc
+{
+public:
+  enum result {ERROR, HIT, MISS};
+
+  Expression_cache(){};
+  virtual ~Expression_cache() {};
+  /**
+    Shall check the presence of expression value in the cache for a given
+    set of values of the expression parameters.  Return the result of the
+    expression if it's found in the cache.
+  */
+  virtual result check_value(Item **value)= 0;
+  /**
+    Shall put the value of an expression for given set of its parameters
+    into the expression cache
+  */
+  virtual my_bool put_value(Item *value)= 0;
+
+  /**
+    Print cache parameters
+  */
+  virtual void print(String *str, enum_query_type query_type)= 0;
+
+  /**
+    Is this cache initialized
+  */
+  virtual bool is_inited()= 0;
+  /**
+    Initialize this cache
+  */
+  virtual void init()= 0;
+};
+
+struct st_table_ref;
+struct st_join_table;
+class Item_field;
+
+
+/**
+  Implementation of expression cache over a temporary table
+*/
+
+class Expression_cache_tmptable :public Expression_cache
+{
+public:
+  Expression_cache_tmptable(THD *thd, List<Item> &dependants, Item *value);
+  virtual ~Expression_cache_tmptable();
+  virtual result check_value(Item **value);
+  virtual my_bool put_value(Item *value);
+
+  void print(String *str, enum_query_type query_type);
+  bool is_inited() { return inited; };
+  void init();
+
+private:
+  void disable_cache();
+
+  /* tmp table parameters */
+  TMP_TABLE_PARAM cache_table_param;
+  /* temporary table to store this cache */
+  TABLE *cache_table;
+  /* Thread handle for the temporary table */
+  THD *table_thd;
+  /* TABLE_REF for index lookup */
+  struct st_table_ref ref;
+  /* Cached result */
+  Item_field *cached_result;
+  /* List of parameter items */
+  List<Item> &items;
+  /* Value Item example */
+  Item *val;
+  /* hit/miss counters */
+  uint hit, miss;
+  /* Set on if the object has been succesfully initialized with init() */
+  bool inited;
+};
+
+#endif /* SQL_EXPRESSION_CACHE_INCLUDED */
diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc
index 4435dfab9ea..0439a3c8265 100644
--- a/sql/sql_handler.cc
+++ b/sql/sql_handler.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2001, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -61,12 +62,38 @@
 #include "sql_select.h"
 #include "transaction.h"
 
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
 #define HANDLER_TABLES_HASH_SIZE 120
 
 static enum enum_ha_read_modes rkey_to_rnext[]=
 { RNEXT_SAME, RNEXT, RPREV, RNEXT, RPREV, RNEXT, RPREV, RPREV };
 
 /*
+  Set handler to state after create, but keep base information about
+  which table is used
+*/
+
+void SQL_HANDLER::reset()
+{
+  fields.empty();
+  arena.free_items();
+  free_root(&mem_root, MYF(0));
+  my_free(lock);
+  init();
+}  
+  
+/* Free all allocated data */
+
+SQL_HANDLER::~SQL_HANDLER()
+{
+  reset();
+  my_free(base_data);
+}
+
+/*
   Get hash key and hash key length.
 
   SYNOPSIS
@@ -85,11 +112,11 @@ static enum enum_ha_read_modes rkey_to_rnext[]=
     Pointer to the TABLE_LIST struct.
 */
 
-static char *mysql_ha_hash_get_key(TABLE_LIST *tables, size_t *key_len_p,
+static char *mysql_ha_hash_get_key(SQL_HANDLER *table, size_t *key_len,
                                    my_bool first __attribute__((unused)))
 {
-  *key_len_p= strlen(tables->alias) + 1 ; /* include '\0' in comparisons */
-  return tables->alias;
+  *key_len= table->handler_name.length + 1 ; /* include '\0' in comparisons */
+  return table->handler_name.str;
 }
 
 
@@ -107,9 +134,9 @@ static char *mysql_ha_hash_get_key(TABLE_LIST *tables, size_t *key_len_p,
     Nothing
 */
 
-static void mysql_ha_hash_free(TABLE_LIST *tables)
+static void mysql_ha_hash_free(SQL_HANDLER *table)
 {
-  my_free(tables);
+  delete table;
 }
 
 /**
@@ -120,34 +147,43 @@ static void mysql_ha_hash_free(TABLE_LIST *tables)
 
   @note Though this function takes a list of tables, only the first list entry
   will be closed.
+  @mote handler_object is not deleted!
   @note Broadcasts refresh if it closed a table with old version.
 */
 
-static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables)
+static void mysql_ha_close_table(SQL_HANDLER *handler)
 {
+  THD *thd= handler->thd;
+  TABLE *table= handler->table;
 
-  if (tables->table && !tables->table->s->tmp_table)
+  /* check if table was already closed */
+  if (!table)
+    return;
+
+  if (!table->s->tmp_table)
   {
     /* Non temporary table. */
-    tables->table->file->ha_index_or_rnd_end();
-    tables->table->open_by_handler= 0;
-    (void) close_thread_table(thd, &tables->table);
-    thd->mdl_context.release_lock(tables->mdl_request.ticket);
+    if (handler->lock)
+    {
+      // Mark it unlocked, like in reset_lock_data()
+      reset_lock_data(handler->lock, 1);
+    }
+
+    table->file->ha_index_or_rnd_end();
+    table->open_by_handler= 0;
+    (void) close_thread_table(thd, &table);
+    thd->mdl_context.release_lock(handler->mdl_request.ticket);
   }
-  else if (tables->table)
+  else
   {
     /* Must be a temporary table */
-    TABLE *table= tables->table;
     table->file->ha_index_or_rnd_end();
     table->query_id= thd->query_id;
     table->open_by_handler= 0;
     mark_tmp_table_for_reuse(table);
   }
-
-  /* Mark table as closed, ready for re-open if necessary. */
-  tables->table= NULL;
-  /* Safety, cleanup the pointer to satisfy MDL assertions. */
-  tables->mdl_request.ticket= NULL;
+  my_free(handler->lock);
+  handler->init();
 }
 
 /*
@@ -163,7 +199,7 @@ static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables)
     Though this function takes a list of tables, only the first list entry
     will be opened.
     'reopen' is set when a handler table is to be re-opened. In this case,
-    'tables' is the pointer to the hashed TABLE_LIST object which has been
+    'tables' is the pointer to the hashed SQL_HANDLER object which has been
     saved on the original open.
     'reopen' is also used to suppress the sending of an 'ok' message.
 
@@ -172,18 +208,18 @@ static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables)
     TRUE  Error
 */
 
-bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
+bool mysql_ha_open(THD *thd, TABLE_LIST *tables, SQL_HANDLER *reopen)
 {
-  TABLE_LIST    *hash_tables = NULL;
-  char          *db, *name, *alias;
-  uint          dblen, namelen, aliaslen, counter;
+  SQL_HANDLER   *sql_handler= 0;
+  uint          counter;
   bool          error;
-  TABLE         *backup_open_tables;
+  TABLE         *table, *backup_open_tables;
   MDL_savepoint mdl_savepoint;
+  Query_arena backup_arena;
   DBUG_ENTER("mysql_ha_open");
   DBUG_PRINT("enter",("'%s'.'%s' as '%s'  reopen: %d",
                       tables->db, tables->table_name, tables->alias,
-                      (int) reopen));
+                      reopen != 0));
 
   if (thd->locked_tables_mode)
   {
@@ -201,7 +237,7 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
   if (! my_hash_inited(&thd->handler_tables_hash))
   {
     /*
-      HASH entries are of type TABLE_LIST.
+      HASH entries are of type SQL_HANDLER
     */
     if (my_hash_init(&thd->handler_tables_hash, &my_charset_latin1,
                      HANDLER_TABLES_HASH_SIZE, 0, 0,
@@ -224,51 +260,6 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
     }
   }
 
-  if (! reopen)
-  {
-    /* copy the TABLE_LIST struct */
-    dblen= strlen(tables->db) + 1;
-    namelen= strlen(tables->table_name) + 1;
-    aliaslen= strlen(tables->alias) + 1;
-    if (!(my_multi_malloc(MYF(MY_WME),
-                          &hash_tables, (uint) sizeof(*hash_tables),
-                          &db, (uint) dblen,
-                          &name, (uint) namelen,
-                          &alias, (uint) aliaslen,
-                          NullS)))
-    {
-      DBUG_PRINT("exit",("ERROR"));
-      DBUG_RETURN(TRUE);
-    }
-    /* structure copy */
-    *hash_tables= *tables;
-    hash_tables->db= db;
-    hash_tables->table_name= name;
-    hash_tables->alias= alias;
-    memcpy(hash_tables->db, tables->db, dblen);
-    memcpy(hash_tables->table_name, tables->table_name, namelen);
-    memcpy(hash_tables->alias, tables->alias, aliaslen);
-    /*
-      We can't request lock with explicit duration for this table
-      right from the start as open_tables() can't handle properly
-      back-off for such locks.
-    */
-    hash_tables->mdl_request.init(MDL_key::TABLE, db, name, MDL_SHARED,
-                                  MDL_TRANSACTION);
-    /* for now HANDLER can be used only for real TABLES */
-    hash_tables->required_type= FRMTYPE_TABLE;
-
-    /* add to hash */
-    if (my_hash_insert(&thd->handler_tables_hash, (uchar*) hash_tables))
-    {
-      my_free(hash_tables);
-      DBUG_PRINT("exit",("ERROR"));
-      DBUG_RETURN(TRUE);
-    }
-  }
-  else
-    hash_tables= tables;
-
   /*
     Save and reset the open_tables list so that open_tables() won't
     be able to access (or know about) the previous list. And on return
@@ -279,62 +270,115 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
   */
   backup_open_tables= thd->open_tables;
   thd->set_open_tables(NULL);
-  mdl_savepoint= thd->mdl_context.mdl_savepoint();
 
   /*
-    open_tables() will set 'hash_tables->table' if successful.
+    open_tables() will set 'tables->table' if successful.
     It must be NULL for a real open when calling open_tables().
   */
-  DBUG_ASSERT(! hash_tables->table);
+  DBUG_ASSERT(! tables->table);
+
+  /*
+    We can't request lock with explicit duration for this table
+    right from the start as open_tables() can't handle properly
+    back-off for such locks.
+  */
+  tables->mdl_request.init(MDL_key::TABLE, tables->db, tables->table_name,
+                           MDL_SHARED, MDL_TRANSACTION);
+  mdl_savepoint= thd->mdl_context.mdl_savepoint();
+
+  /* for now HANDLER can be used only for real TABLES */
+  tables->required_type= FRMTYPE_TABLE;
 
   /*
     We use open_tables() here, rather than, say,
     open_ltable() or open_table() because we would like to be able
     to open a temporary table.
   */
-  error= open_tables(thd, &hash_tables, &counter, 0);
+  error= open_tables(thd, &tables, &counter, 0);
+
+  if (error)
+    goto err;
 
-  if (! error &&
-      ! (hash_tables->table->file->ha_table_flags() & HA_CAN_SQL_HANDLER))
+  table= tables->table;
+
+  /* There can be only one table in '*tables'. */
+  if (! (table->file->ha_table_flags() & HA_CAN_SQL_HANDLER))
   {
     my_error(ER_ILLEGAL_HA, MYF(0), tables->alias);
-    error= TRUE;
+    goto err;
   }
-  if (!error &&
-      hash_tables->mdl_request.ticket &&
-      thd->mdl_context.has_lock(mdl_savepoint,
-                                hash_tables->mdl_request.ticket))
+
+  if (tables->mdl_request.ticket &&
+      thd->mdl_context.has_lock(mdl_savepoint, tables->mdl_request.ticket))
   {
     /* The ticket returned is within a savepoint. Make a copy.  */
-    error= thd->mdl_context.clone_ticket(&hash_tables->mdl_request);
-    hash_tables->table->mdl_ticket= hash_tables->mdl_request.ticket;
+    error= thd->mdl_context.clone_ticket(&tables->mdl_request);
+    tables->table->mdl_ticket= tables->mdl_request.ticket;
+    if (error)
+      goto err;
   }
-  if (error)
+
+  if (! reopen)
   {
-    /*
-      No need to rollback statement transaction, it's not started.
-      If called with reopen flag, no need to rollback either,
-      it will be done at statement end.
-    */
-    DBUG_ASSERT(thd->transaction.stmt.is_empty());
-    close_thread_tables(thd);
-    thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
-    thd->set_open_tables(backup_open_tables);
-    if (!reopen)
-      my_hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables);
-    else
-    {
-      hash_tables->table= NULL;
-      /* Safety, cleanup the pointer to satisfy MDL assertions. */
-      hash_tables->mdl_request.ticket= NULL;
-    }
-    DBUG_PRINT("exit",("ERROR"));
-    DBUG_RETURN(TRUE);
+    /* copy data to sql_handler */
+    if (!(sql_handler= new SQL_HANDLER(thd)))
+      goto err;
+    init_alloc_root(&sql_handler->mem_root, 1024, 0);
+
+    sql_handler->db.length= strlen(tables->db);
+    sql_handler->table_name.length= strlen(tables->table_name);
+    sql_handler->handler_name.length= strlen(tables->alias);
+
+    if (!(my_multi_malloc(MY_WME,
+                          &sql_handler->db.str,
+                          (uint) sql_handler->db.length + 1,
+                          &sql_handler->table_name.str,
+                          (uint) sql_handler->table_name.length + 1,
+                          &sql_handler->handler_name.str,
+                          (uint) sql_handler->handler_name.length + 1,
+                          NullS)))
+      goto err;
+    sql_handler->base_data= sql_handler->db.str;  // Free this
+    memcpy(sql_handler->db.str, tables->db, sql_handler->db.length +1);
+    memcpy(sql_handler->table_name.str, tables->table_name,
+           sql_handler->table_name.length+1);
+    memcpy(sql_handler->handler_name.str, tables->alias,
+           sql_handler->handler_name.length +1);
+
+    /* add to hash */
+    if (my_hash_insert(&thd->handler_tables_hash, (uchar*) sql_handler))
+      goto err;
   }
+  else
+  {
+    sql_handler= reopen;
+    sql_handler->reset();
+  }    
+  sql_handler->table= table;
+  memcpy(&sql_handler->mdl_request, &tables->mdl_request,
+         sizeof(tables->mdl_request));
+
+  if (!(sql_handler->lock= get_lock_data(thd, &sql_handler->table, 1,
+                                         GET_LOCK_STORE_LOCKS)))
+    goto err;
+
+  /* Get a list of all fields for send_fields */
+  thd->set_n_backup_active_arena(&sql_handler->arena, &backup_arena);
+  error= table->fill_item_list(&sql_handler->fields);
+  thd->restore_active_arena(&sql_handler->arena, &backup_arena);
+
+  if (error)
+    goto err;
+
+  /* Always read all columns */
+  table->read_set= &table->s->all_set;
+  table->vcol_set= &table->s->all_set;
+
+  /* Restore the state. */
   thd->set_open_tables(backup_open_tables);
-  if (hash_tables->mdl_request.ticket)
+  if (sql_handler->mdl_request.ticket)
   {
-    thd->mdl_context.set_lock_duration(hash_tables->mdl_request.ticket,
+    thd->mdl_context.set_lock_duration(sql_handler->mdl_request.ticket,
                                        MDL_EXPLICIT);
     thd->mdl_context.set_needs_thr_lock_abort(TRUE);
   }
@@ -345,19 +389,42 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
     was opened for HANDLER as it is used to link them together
     (see thd->temporary_tables).
   */
-  DBUG_ASSERT(hash_tables->table->next == NULL ||
-              hash_tables->table->s->tmp_table);
+  DBUG_ASSERT(sql_handler->table->next == NULL ||
+              sql_handler->table->s->tmp_table);
   /*
     If it's a temp table, don't reset table->query_id as the table is
     being used by this handler. For non-temp tables we use this flag
     in asserts.
   */
-  hash_tables->table->open_by_handler= 1;
+  table->open_by_handler= 1;
+
+  /* Safety, cleanup the pointer to satisfy MDL assertions. */
+  tables->mdl_request.ticket= NULL;
 
   if (! reopen)
     my_ok(thd);
   DBUG_PRINT("exit",("OK"));
   DBUG_RETURN(FALSE);
+
+err:
+  /*
+    No need to rollback statement transaction, it's not started.
+    If called with reopen flag, no need to rollback either,
+    it will be done at statement end.
+  */
+  DBUG_ASSERT(thd->transaction.stmt.is_empty());
+  close_thread_tables(thd);
+  thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
+  thd->set_open_tables(backup_open_tables);
+  if (sql_handler)
+  {
+    if (!reopen)
+      my_hash_delete(&thd->handler_tables_hash, (uchar*) sql_handler);
+    else
+      sql_handler->reset(); // or should it be init() ?
+  }
+  DBUG_PRINT("exit",("ERROR"));
+  DBUG_RETURN(TRUE);
 }
 
 
@@ -380,7 +447,7 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
 
 bool mysql_ha_close(THD *thd, TABLE_LIST *tables)
 {
-  TABLE_LIST    *hash_tables;
+  SQL_HANDLER *handler;
   DBUG_ENTER("mysql_ha_close");
   DBUG_PRINT("enter",("'%s'.'%s' as '%s'",
                       tables->db, tables->table_name, tables->alias));
@@ -390,12 +457,12 @@ bool mysql_ha_close(THD *thd, TABLE_LIST *tables)
     my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0));
     DBUG_RETURN(TRUE);
   }
-  if ((hash_tables= (TABLE_LIST*) my_hash_search(&thd->handler_tables_hash,
+  if ((handler= (SQL_HANDLER*) my_hash_search(&thd->handler_tables_hash,
                                                  (uchar*) tables->alias,
                                                  strlen(tables->alias) + 1)))
   {
-    mysql_ha_close_table(thd, hash_tables);
-    my_hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables);
+    mysql_ha_close_table(handler);
+    my_hash_delete(&thd->handler_tables_hash, (uchar*) handler);
   }
   else
   {
@@ -467,6 +534,167 @@ handle_condition(THD *thd,
 }
 
 
+/**
+   Finds an open HANDLER table.
+
+   @params name		Name of handler to open
+
+   @return 0 failure
+   @return handler
+*/  
+
+SQL_HANDLER *mysql_ha_find_handler(THD *thd, const char *name)
+{
+  SQL_HANDLER *handler;
+  if ((handler= (SQL_HANDLER*) my_hash_search(&thd->handler_tables_hash,
+                                              (uchar*) name, strlen(name) + 1)))
+  {
+    DBUG_PRINT("info-in-hash",("'%s'.'%s' as '%s' table: %p",
+                               handler->db.str,
+                               handler->table_name.str,
+                               handler->handler_name.str, handler->table));
+    if (!handler->table)
+    {
+      /* The handler table has been closed. Re-open it. */
+      TABLE_LIST tmp;
+      tmp.init_one_table(handler->db.str, handler->db.length,
+                         handler->table_name.str, handler->table_name.length,
+                         handler->handler_name.str, TL_READ);
+
+      if (mysql_ha_open(thd, &tmp, handler))
+      {
+        DBUG_PRINT("exit",("reopen failed"));
+        return 0;
+      }
+    }
+  }
+  else
+  {
+    my_error(ER_UNKNOWN_TABLE, MYF(0), name, "HANDLER");
+    return 0;
+  }
+  return handler;
+}
+
+
+/**
+   Check that condition and key name are ok
+
+   @param handler
+   @param mode		Read mode (RFIRST, RNEXT etc...)
+   @param keyname	Key to use.
+   @param key_expr      List of key column values
+   @param cond		Where clause
+   @param in_prepare	If we are in prepare phase (we can't evalute items yet)
+
+   @return 0 ok
+   @return 1 error
+
+   In ok, then values of used key and mode is stored in sql_handler
+*/
+
+static bool
+mysql_ha_fix_cond_and_key(SQL_HANDLER *handler, 
+                          enum enum_ha_read_modes mode, char *keyname,
+                          List<Item> *key_expr,
+                          Item *cond, bool in_prepare)
+{
+  THD *thd= handler->thd;
+  TABLE *table= handler->table;
+  if (cond)
+  {
+    /* This can only be true for temp tables */
+    if (table->query_id != thd->query_id)
+      cond->cleanup();                          // File was reopened
+    if ((!cond->fixed &&
+	 cond->fix_fields(thd, &cond)) || cond->check_cols(1))
+      return 1;
+  }
+
+  if (keyname)
+  {
+    /* Check if same as last keyname. If not, do a full lookup */
+    if (handler->keyno < 0 ||
+        my_strcasecmp(&my_charset_latin1,
+                      keyname,
+                      table->s->key_info[handler->keyno].name))
+    {
+      if ((handler->keyno= find_type(keyname, &table->s->keynames,
+                                     FIND_TYPE_NO_PREFIX) - 1) < 0)
+      {
+        my_error(ER_KEY_DOES_NOT_EXITS, MYF(0), keyname,
+                 handler->handler_name.str);
+        return 1;
+      }
+    }
+
+    /* Check key parts */
+    if (mode == RKEY)
+    {
+      TABLE *table= handler->table;
+      KEY *keyinfo= table->key_info + handler->keyno;
+      KEY_PART_INFO *key_part= keyinfo->key_part;
+      List_iterator<Item> it_ke(*key_expr);
+      Item *item;
+      key_part_map keypart_map;
+      uint key_len;
+
+      if (key_expr->elements > keyinfo->key_parts)
+      {
+        my_error(ER_TOO_MANY_KEY_PARTS, MYF(0), keyinfo->key_parts);
+        return 1;
+      }
+      for (keypart_map= key_len=0 ; (item=it_ke++) ; key_part++)
+      {
+        my_bitmap_map *old_map;
+	/* note that 'item' can be changed by fix_fields() call */
+        if ((!item->fixed &&
+             item->fix_fields(thd, it_ke.ref())) ||
+	    (item= *it_ke.ref())->check_cols(1))
+          return 1;
+	if (item->used_tables() & ~(RAND_TABLE_BIT | PARAM_TABLE_BIT))
+        {
+          my_error(ER_WRONG_ARGUMENTS,MYF(0),"HANDLER ... READ");
+	  return 1;
+        }
+        if (!in_prepare)
+        {
+          old_map= dbug_tmp_use_all_columns(table, table->write_set);
+          (void) item->save_in_field(key_part->field, 1);
+          dbug_tmp_restore_column_map(table->write_set, old_map);
+        }
+        key_len+= key_part->store_length;
+        keypart_map= (keypart_map << 1) | 1;
+      }
+      handler->keypart_map= keypart_map;
+      handler->key_len= key_len;
+    }
+    else
+    {
+      /*
+        Check if the same index involved.
+        We need to always do this check because we may not have yet
+        called the handler since the last keyno change.
+      */
+      if ((uint) handler->keyno != table->file->get_index())
+      {
+        if (mode == RNEXT)
+          mode= RFIRST;
+        else if (mode == RPREV)
+          mode= RLAST;
+      }
+    }
+  }
+  else if (table->file->inited != handler::RND)
+  {
+    /* Convert RNEXT to RFIRST if we haven't started row scan */
+    if (mode == RNEXT)
+      mode= RFIRST;
+  }
+  handler->mode= mode;                          // Store adjusted mode
+  return 0;
+}
+
 /*
   Read from a HANDLER table.
 
@@ -493,17 +721,14 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
                    enum ha_rkey_function ha_rkey_mode, Item *cond,
                    ha_rows select_limit_cnt, ha_rows offset_limit_cnt)
 {
-  TABLE_LIST    *hash_tables;
-  TABLE         *table, *backup_open_tables;
-  MYSQL_LOCK    *lock;
-  List<Item>	list;
+  SQL_HANDLER   *handler;
+  TABLE         *table;
   Protocol	*protocol= thd->protocol;
   char		buff[MAX_FIELD_WIDTH];
   String	buffer(buff, sizeof(buff), system_charset_info);
-  int           error, keyno= -1;
+  int           error, keyno;
   uint          num_rows;
   uchar		*UNINIT_VAR(key);
-  uint		UNINIT_VAR(key_len);
   Sql_handler_lock_error_handler sql_handler_lock_error;
   DBUG_ENTER("mysql_ha_read");
   DBUG_PRINT("enter",("'%s'.'%s' as '%s'",
@@ -515,125 +740,81 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
     DBUG_RETURN(TRUE);
   }
 
-  thd->lex->select_lex.context.resolve_in_table_list_only(tables);
-  list.push_front(new Item_field(&thd->lex->select_lex.context,
-                                 NULL, NULL, "*"));
-  List_iterator<Item> it(list);
-  it++;
-
 retry:
-  if ((hash_tables= (TABLE_LIST*) my_hash_search(&thd->handler_tables_hash,
-                                                 (uchar*) tables->alias,
-                                                 strlen(tables->alias) + 1)))
-  {
-    table= hash_tables->table;
-    DBUG_PRINT("info-in-hash",("'%s'.'%s' as '%s' table: 0x%lx",
-                               hash_tables->db, hash_tables->table_name,
-                               hash_tables->alias, (long) table));
-    if (!table)
-    {
-      /*
-        The handler table has been closed. Re-open it.
-      */
-      if (mysql_ha_open(thd, hash_tables, 1))
-      {
-        DBUG_PRINT("exit",("reopen failed"));
-        goto err0;
-      }
+  if (!(handler= mysql_ha_find_handler(thd, tables->alias)))
+    goto err0;
 
-      table= hash_tables->table;
-      DBUG_PRINT("info",("re-opened '%s'.'%s' as '%s' tab %p",
-                         hash_tables->db, hash_tables->table_name,
-                         hash_tables->alias, table));
-    }
-  }
-  else
-    table= NULL;
+  table= handler->table;
+  tables->table= table;                         // This is used by fix_fields
+  table->pos_in_table_list= tables;
 
-  if (!table)
+  if (handler->lock->lock_count > 0)
   {
-    my_error(ER_UNKNOWN_TABLE, MYF(0), tables->alias, "HANDLER");
-    goto err0;
-  }
+    bool lock_error;
 
-  /* save open_tables state */
-  backup_open_tables= thd->open_tables;
-  /* Always a one-element list, see mysql_ha_open(). */
-  DBUG_ASSERT(hash_tables->table->next == NULL ||
-              hash_tables->table->s->tmp_table);
-  /*
-    mysql_lock_tables() needs thd->open_tables to be set correctly to
-    be able to handle aborts properly.
-  */
-  thd->set_open_tables(hash_tables->table);
+    handler->lock->locks[0]->type= handler->lock->locks[0]->org_type;
 
+    /* save open_tables state */
+    TABLE* backup_open_tables= thd->open_tables;
+    /* Always a one-element list, see mysql_ha_open(). */
+    DBUG_ASSERT(table->next == NULL || table->s->tmp_table);
+    /*
+      mysql_lock_tables() needs thd->open_tables to be set correctly to
+      be able to handle aborts properly.
+    */
+    thd->set_open_tables(table);
 
-  sql_handler_lock_error.init();
-  thd->push_internal_handler(&sql_handler_lock_error);
+    sql_handler_lock_error.init();
+    thd->push_internal_handler(&sql_handler_lock_error);
 
-  lock= mysql_lock_tables(thd, &thd->open_tables, 1, 0);
+    lock_error= mysql_lock_tables(thd, handler->lock,
+                                  (table->s->tmp_table == NO_TMP_TABLE ?
+                                    MYSQL_LOCK_NOT_TEMPORARY : 0));
 
-  thd->pop_internal_handler();
-  /*
-    In 5.1 and earlier, mysql_lock_tables() could replace the TABLE
-    object with another one (reopen it). This is no longer the case
-    with new MDL.
-  */
-  DBUG_ASSERT(hash_tables->table == thd->open_tables);
-  /* Restore previous context. */
-  thd->set_open_tables(backup_open_tables);
+    thd->pop_internal_handler();
 
-  if (sql_handler_lock_error.need_reopen())
-  {
-    DBUG_ASSERT(!lock && !thd->is_error());
     /*
-      Always close statement transaction explicitly,
-      so that the engine doesn't have to count locks.
+      In 5.1 and earlier, mysql_lock_tables() could replace the TABLE
+      object with another one (reopen it). This is no longer the case
+      with new MDL.
     */
-    trans_rollback_stmt(thd);
-    mysql_ha_close_table(thd, hash_tables);
-    goto retry;
-  }
-
-  if (!lock)
-    goto err0; // mysql_lock_tables() printed error message already
-
-  // Always read all columns
-  hash_tables->table->read_set= &hash_tables->table->s->all_set;
-  tables->table= hash_tables->table;
-
-  if (cond)
-  {
-    if (table->query_id != thd->query_id)
-      cond->cleanup();                          // File was reopened
-    if ((!cond->fixed &&
-	 cond->fix_fields(thd, &cond)) || cond->check_cols(1))
-      goto err;
-  }
+    DBUG_ASSERT(table == thd->open_tables);
+    /* Restore previous context. */
+    thd->set_open_tables(backup_open_tables);
 
-  if (keyname)
-  {
-    if ((keyno= find_type(keyname, &table->s->keynames,
-                          FIND_TYPE_NO_PREFIX) - 1) < 0)
+    if (sql_handler_lock_error.need_reopen())
     {
-      my_error(ER_KEY_DOES_NOT_EXITS, MYF(0), keyname, tables->alias);
-      goto err;
-    }
-    /* Check if the same index involved. */
-    if ((uint) keyno != table->file->get_index())
-    {
-      if (mode == RNEXT)
-        mode= RFIRST;
-      else if (mode == RPREV)
-        mode= RLAST;
+      DBUG_ASSERT(lock_error && !thd->is_error());
+      /*
+        Always close statement transaction explicitly,
+        so that the engine doesn't have to count locks.
+      */
+      trans_rollback_stmt(thd);
+      mysql_ha_close_table(handler);
+      if (thd->stmt_arena->is_stmt_execute())
+      {
+        /*
+          As we have already sent field list and types to the client, we can't
+          handle any changes in the table format for prepared statements.
+          Better to force a reprepare.
+        */
+        my_error(ER_NEED_REPREPARE, MYF(0));
+        goto err0;
+      }
+      goto retry;
     }
+
+    if (lock_error)
+      goto err0; // mysql_lock_tables() printed error message already
   }
 
-  if (insert_fields(thd, &thd->lex->select_lex.context,
-                    tables->db, tables->alias, &it, 0))
+  if (mysql_ha_fix_cond_and_key(handler, mode, keyname, key_expr, cond, 0))
     goto err;
+  mode= handler->mode;
+  keyno= handler->keyno;
 
-  protocol->send_result_set_metadata(&list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
+  protocol->send_result_set_metadata(&handler->fields,
+                                Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
 
   /*
     In ::external_lock InnoDB resets the fields which tell it that
@@ -649,16 +830,16 @@ retry:
     case RNEXT:
       if (table->file->inited != handler::NONE)
       {
+        if ((error= table->file->can_continue_handler_scan()))
+          break;
         if (keyname)
         {
           /* Check if we read from the same index. */
           DBUG_ASSERT((uint) keyno == table->file->get_index());
-          error= table->file->index_next(table->record[0]);
+          error= table->file->ha_index_next(table->record[0]);
         }
         else
-        {
-          error= table->file->rnd_next(table->record[0]);
-        }
+          error= table->file->ha_rnd_next(table->record[0]);
         break;
       }
       /* else fall through */
@@ -667,15 +848,15 @@ retry:
       {
         table->file->ha_index_or_rnd_end();
         table->file->ha_index_init(keyno, 1);
-        error= table->file->index_first(table->record[0]);
+        error= table->file->ha_index_first(table->record[0]);
       }
       else
       {
         table->file->ha_index_or_rnd_end();
 	if (!(error= table->file->ha_rnd_init(1)))
-          error= table->file->rnd_next(table->record[0]);
+          error= table->file->ha_rnd_next(table->record[0]);
       }
-      mode=RNEXT;
+      mode= RNEXT;
       break;
     case RPREV:
       DBUG_ASSERT(keyname != 0);
@@ -683,7 +864,9 @@ retry:
       DBUG_ASSERT((uint) keyno == table->file->get_index());
       if (table->file->inited != handler::NONE)
       {
-        error=table->file->index_prev(table->record[0]);
+        if ((error= table->file->can_continue_handler_scan()))
+          break;
+        error= table->file->ha_index_prev(table->record[0]);
         break;
       }
       /* else fall through */
@@ -691,55 +874,29 @@ retry:
       DBUG_ASSERT(keyname != 0);
       table->file->ha_index_or_rnd_end();
       table->file->ha_index_init(keyno, 1);
-      error= table->file->index_last(table->record[0]);
-      mode=RPREV;
+      error= table->file->ha_index_last(table->record[0]);
+      mode= RPREV;
       break;
     case RNEXT_SAME:
       /* Continue scan on "(keypart1,keypart2,...)=(c1, c2, ...)  */
       DBUG_ASSERT(keyname != 0);
-      error= table->file->index_next_same(table->record[0], key, key_len);
+      error= table->file->ha_index_next_same(table->record[0], key,
+                                             handler->key_len);
       break;
     case RKEY:
     {
       DBUG_ASSERT(keyname != 0);
-      KEY *keyinfo=table->key_info+keyno;
-      KEY_PART_INFO *key_part=keyinfo->key_part;
-      if (key_expr->elements > keyinfo->key_parts)
-      {
-	my_error(ER_TOO_MANY_KEY_PARTS, MYF(0), keyinfo->key_parts);
-	goto err;
-      }
-      List_iterator<Item> it_ke(*key_expr);
-      Item *item;
-      key_part_map keypart_map;
-      for (keypart_map= key_len=0 ; (item=it_ke++) ; key_part++)
-      {
-        my_bitmap_map *old_map;
-	// 'item' can be changed by fix_fields() call
-        if ((!item->fixed &&
-             item->fix_fields(thd, it_ke.ref())) ||
-	    (item= *it_ke.ref())->check_cols(1))
-	  goto err;
-	if (item->used_tables() & ~RAND_TABLE_BIT)
-        {
-          my_error(ER_WRONG_ARGUMENTS,MYF(0),"HANDLER ... READ");
-	  goto err;
-        }
-        old_map= dbug_tmp_use_all_columns(table, table->write_set);
-	(void) item->save_in_field(key_part->field, 1);
-        dbug_tmp_restore_column_map(table->write_set, old_map);
-	key_len+=key_part->store_length;
-        keypart_map= (keypart_map << 1) | 1;
-      }
 
-      if (!(key= (uchar*) thd->calloc(ALIGN_SIZE(key_len))))
+      if (!(key= (uchar*) thd->calloc(ALIGN_SIZE(handler->key_len))))
 	goto err;
       table->file->ha_index_or_rnd_end();
       table->file->ha_index_init(keyno, 1);
-      key_copy(key, table->record[0], table->key_info + keyno, key_len);
-      error= table->file->index_read_map(table->record[0],
-                                         key, keypart_map, ha_rkey_mode);
-      mode=rkey_to_rnext[(int)ha_rkey_mode];
+      key_copy(key, table->record[0], table->key_info + keyno,
+               handler->key_len);
+      error= table->file->ha_index_read_map(table->record[0],
+                                            key, handler->keypart_map,
+                                            ha_rkey_mode);
+      mode= rkey_to_rnext[(int)ha_rkey_mode];
       break;
     }
     default:
@@ -753,13 +910,19 @@ retry:
         continue;
       if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       {
-        sql_print_error("mysql_ha_read: Got error %d when reading table '%s'",
-                        error, tables->table_name);
+        /* Don't give error in the log file for some expected problems */
+        if (error != HA_ERR_RECORD_CHANGED && error != HA_ERR_WRONG_COMMAND)
+          sql_print_error("mysql_ha_read: Got error %d when reading "
+                          "table '%s'",
+                          error, tables->table_name);
         table->file->print_error(error,MYF(0));
+        table->file->ha_index_or_rnd_end();
         goto err;
       }
       goto ok;
     }
+    /* Generate values for virtual fields */
+    update_virtual_fields(thd, table);
     if (cond && !cond->val_int())
     {
       if (thd->is_error())
@@ -770,7 +933,7 @@ retry:
     {
       protocol->prepare_for_resend();
 
-      if (protocol->send_result_set_row(&list))
+      if (protocol->send_result_set_row(&handler->fields))
         goto err;
 
       protocol->write();
@@ -783,14 +946,14 @@ ok:
     so that the engine doesn't have to count locks.
   */
   trans_commit_stmt(thd);
-  mysql_unlock_tables(thd,lock);
+  mysql_unlock_tables(thd, handler->lock, 0);
   my_eof(thd);
   DBUG_PRINT("exit",("OK"));
   DBUG_RETURN(FALSE);
 
 err:
   trans_rollback_stmt(thd);
-  mysql_unlock_tables(thd,lock);
+  mysql_unlock_tables(thd, handler->lock, 0);
 err0:
   DBUG_PRINT("exit",("ERROR"));
   DBUG_RETURN(TRUE);
@@ -798,6 +961,28 @@ err0:
 
 
 /**
+   Prepare for handler read
+
+   For parameters, see mysql_ha_read()
+*/
+
+SQL_HANDLER *mysql_ha_read_prepare(THD *thd, TABLE_LIST *tables,
+                                   enum enum_ha_read_modes mode, char *keyname,
+                                   List<Item> *key_expr, Item *cond)
+{
+  SQL_HANDLER *handler;
+  DBUG_ENTER("mysql_ha_read_prepare");
+  if (!(handler= mysql_ha_find_handler(thd, tables->alias)))
+    DBUG_RETURN(0);
+  tables->table= handler->table;         // This is used by fix_fields
+  if (mysql_ha_fix_cond_and_key(handler, mode, keyname, key_expr, cond, 1))
+    DBUG_RETURN(0);
+  DBUG_RETURN(handler);
+}
+
+  
+
+/**
   Scan the handler tables hash for matching tables.
 
   @param thd Thread identifier.
@@ -808,30 +993,32 @@ err0:
           table was matched.
 */
 
-static TABLE_LIST *mysql_ha_find(THD *thd, TABLE_LIST *tables)
+static SQL_HANDLER *mysql_ha_find_match(THD *thd, TABLE_LIST *tables)
 {
-  TABLE_LIST *hash_tables, *head= NULL, *first= tables;
-  DBUG_ENTER("mysql_ha_find");
+  SQL_HANDLER *hash_tables, *head= NULL;
+  TABLE_LIST *first= tables;
+  DBUG_ENTER("mysql_ha_find_match");
 
   /* search for all handlers with matching table names */
   for (uint i= 0; i < thd->handler_tables_hash.records; i++)
   {
-    hash_tables= (TABLE_LIST*) my_hash_element(&thd->handler_tables_hash, i);
+    hash_tables= (SQL_HANDLER*) my_hash_element(&thd->handler_tables_hash, i);
+
     for (tables= first; tables; tables= tables->next_local)
     {
       if ((! *tables->db ||
-          ! my_strcasecmp(&my_charset_latin1, hash_tables->db, tables->db)) &&
-          ! my_strcasecmp(&my_charset_latin1, hash_tables->table_name,
+          ! my_strcasecmp(&my_charset_latin1, hash_tables->db.str,
+                          tables->db)) &&
+          ! my_strcasecmp(&my_charset_latin1, hash_tables->table_name.str,
                           tables->table_name))
+      {
+        /* Link into hash_tables list */
+        hash_tables->next= head;
+        head= hash_tables;
         break;
-    }
-    if (tables)
-    {
-      hash_tables->next_local= head;
-      head= hash_tables;
+      }
     }
   }
-
   DBUG_RETURN(head);
 }
 
@@ -847,18 +1034,18 @@ static TABLE_LIST *mysql_ha_find(THD *thd, TABLE_LIST *tables)
 
 void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables)
 {
-  TABLE_LIST *hash_tables, *next;
+  SQL_HANDLER *hash_tables, *next;
   DBUG_ENTER("mysql_ha_rm_tables");
 
   DBUG_ASSERT(tables);
 
-  hash_tables= mysql_ha_find(thd, tables);
+  hash_tables= mysql_ha_find_match(thd, tables);
 
   while (hash_tables)
   {
-    next= hash_tables->next_local;
+    next= hash_tables->next;
     if (hash_tables->table)
-      mysql_ha_close_table(thd, hash_tables);
+      mysql_ha_close_table(hash_tables);
     my_hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables);
     hash_tables= next;
   }
@@ -888,13 +1075,13 @@ void mysql_ha_flush_tables(THD *thd, TABLE_LIST *all_tables)
   for (TABLE_LIST *table_list= all_tables; table_list;
        table_list= table_list->next_global)
   {
-    TABLE_LIST *hash_tables= mysql_ha_find(thd, table_list);
+    SQL_HANDLER *hash_tables= mysql_ha_find_match(thd, table_list);
     /* Close all aliases of the same table. */
     while (hash_tables)
     {
-      TABLE_LIST *next_local= hash_tables->next_local;
+      SQL_HANDLER *next_local= hash_tables->next;
       if (hash_tables->table)
-        mysql_ha_close_table(thd, hash_tables);
+        mysql_ha_close_table(hash_tables);
       hash_tables= next_local;
     }
   }
@@ -914,7 +1101,7 @@ void mysql_ha_flush_tables(THD *thd, TABLE_LIST *all_tables)
 
 void mysql_ha_flush(THD *thd)
 {
-  TABLE_LIST *hash_tables;
+  SQL_HANDLER *hash_tables;
   DBUG_ENTER("mysql_ha_flush");
 
   mysql_mutex_assert_not_owner(&LOCK_open);
@@ -929,7 +1116,7 @@ void mysql_ha_flush(THD *thd)
 
   for (uint i= 0; i < thd->handler_tables_hash.records; i++)
   {
-    hash_tables= (TABLE_LIST*) my_hash_element(&thd->handler_tables_hash, i);
+    hash_tables= (SQL_HANDLER*) my_hash_element(&thd->handler_tables_hash, i);
     /*
       TABLE::mdl_ticket is 0 for temporary tables so we need extra check.
     */
@@ -938,7 +1125,7 @@ void mysql_ha_flush(THD *thd)
          hash_tables->table->mdl_ticket->has_pending_conflicting_lock()) ||
          (!hash_tables->table->s->tmp_table &&
           hash_tables->table->s->has_old_version())))
-      mysql_ha_close_table(thd, hash_tables);
+      mysql_ha_close_table(hash_tables);
   }
 
   DBUG_VOID_RETURN;
@@ -955,14 +1142,14 @@ void mysql_ha_flush(THD *thd)
 
 void mysql_ha_cleanup(THD *thd)
 {
-  TABLE_LIST *hash_tables;
+  SQL_HANDLER *hash_tables;
   DBUG_ENTER("mysql_ha_cleanup");
 
   for (uint i= 0; i < thd->handler_tables_hash.records; i++)
   {
-    hash_tables= (TABLE_LIST*) my_hash_element(&thd->handler_tables_hash, i);
+    hash_tables= (SQL_HANDLER*) my_hash_element(&thd->handler_tables_hash, i);
     if (hash_tables->table)
-      mysql_ha_close_table(thd, hash_tables);
+      mysql_ha_close_table(hash_tables);
   }
 
   my_hash_free(&thd->handler_tables_hash);
@@ -980,12 +1167,12 @@ void mysql_ha_cleanup(THD *thd)
 
 void mysql_ha_set_explicit_lock_duration(THD *thd)
 {
-  TABLE_LIST *hash_tables;
+  SQL_HANDLER *hash_tables;
   DBUG_ENTER("mysql_ha_set_explicit_lock_duration");
 
   for (uint i= 0; i < thd->handler_tables_hash.records; i++)
   {
-    hash_tables= (TABLE_LIST*) my_hash_element(&thd->handler_tables_hash, i);
+    hash_tables= (SQL_HANDLER*) my_hash_element(&thd->handler_tables_hash, i);
     if (hash_tables->table && hash_tables->table->mdl_ticket)
       thd->mdl_context.set_lock_duration(hash_tables->table->mdl_ticket,
                                          MDL_EXPLICIT);
diff --git a/sql/sql_handler.h b/sql/sql_handler.h
index b21d4595fce..133f553675e 100644
--- a/sql/sql_handler.h
+++ b/sql/sql_handler.h
@@ -1,5 +1,6 @@
-/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-
+#ifndef SQL_HANDLER_INCLUDED
+#define SQL_HANDLER_INCLUDED
+/* Copyright (C) 2010 Monty Program Ab
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; version 2 of the License.
@@ -13,17 +14,57 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
 
-#ifndef SQL_HANDLER_INCLUDED
-#define SQL_HANDLER_INCLUDED
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
 
 #include "sql_class.h"                 /* enum_ha_read_mode */
 #include "my_base.h"                   /* ha_rkey_function, ha_rows */
 #include "sql_list.h"                  /* List */
 
+/* Open handlers are stored here */
+
+class SQL_HANDLER {
+public:
+  TABLE *table;
+  List<Item> fields;                            /* Fields, set on open */
+  THD *thd;
+  LEX_STRING handler_name;
+  LEX_STRING db;
+  LEX_STRING table_name;
+  MEM_ROOT mem_root;
+  MYSQL_LOCK *lock;
+  MDL_request mdl_request;
+
+  key_part_map keypart_map;
+  int keyno;                                    /* Used key */
+  uint key_len;
+  enum enum_ha_read_modes mode;
+
+  /* This is only used when deleting many handler objects */
+  SQL_HANDLER *next;
+
+  Query_arena arena;
+  char *base_data;
+  SQL_HANDLER(THD *thd_arg) :
+    thd(thd_arg), arena(&mem_root, Query_arena::STMT_INITIALIZED)
+  { init(); clear_alloc_root(&mem_root); base_data= 0; }
+  void init()
+  {
+    keyno= -1;
+    table= 0;
+    lock= 0;
+    mdl_request.ticket= 0;
+  }
+  void reset();
+
+  ~SQL_HANDLER();
+};
+
 class THD;
 struct TABLE_LIST;
 
-bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen);
+bool mysql_ha_open(THD *thd, TABLE_LIST *tables, SQL_HANDLER *reopen);
 bool mysql_ha_close(THD *thd, TABLE_LIST *tables);
 bool mysql_ha_read(THD *, TABLE_LIST *,enum enum_ha_read_modes,char *,
                    List<Item> *,enum ha_rkey_function,Item *,ha_rows,ha_rows);
@@ -33,4 +74,7 @@ void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables);
 void mysql_ha_cleanup(THD *thd);
 void mysql_ha_set_explicit_lock_duration(THD *thd);
 
-#endif /* SQL_HANDLER_INCLUDED */
+SQL_HANDLER *mysql_ha_read_prepare(THD *thd, TABLE_LIST *tables,
+                                   enum enum_ha_read_modes mode, char *keyname,
+                                   List<Item> *key_expr, Item *cond);
+#endif
diff --git a/sql/sql_help.cc b/sql/sql_help.cc
index 99203ef98ba..ca0e695b3fe 100644
--- a/sql/sql_help.cc
+++ b/sql/sql_help.cc
@@ -188,11 +188,14 @@ int search_topics(THD *thd, TABLE *topics, struct st_find_field *find_fields,
 		  SQL_SELECT *select, List<String> *names,
 		  String *name, String *description, String *example)
 {
-  DBUG_ENTER("search_topics");
   int count= 0;
-
   READ_RECORD read_record_info;
-  init_read_record(&read_record_info, thd, topics, select, 1, 0, FALSE);
+  DBUG_ENTER("search_topics");
+
+  /* Should never happen. As this is part of help, we can ignore this */
+  if (init_read_record(&read_record_info, thd, topics, select, 1, 0, FALSE))
+    DBUG_RETURN(0);
+
   while (!read_record_info.read_record(&read_record_info))
   {
     if (!select->cond->val_int())		// Doesn't match like
@@ -228,11 +231,13 @@ int search_topics(THD *thd, TABLE *topics, struct st_find_field *find_fields,
 int search_keyword(THD *thd, TABLE *keywords, struct st_find_field *find_fields,
                    SQL_SELECT *select, int *key_id)
 {
-  DBUG_ENTER("search_keyword");
   int count= 0;
-
   READ_RECORD read_record_info;
-  init_read_record(&read_record_info, thd, keywords, select, 1, 0, FALSE);
+  DBUG_ENTER("search_keyword");
+  /* Should never happen. As this is part of help, we can ignore this */
+  if (init_read_record(&read_record_info, thd, keywords, select, 1, 0, FALSE))
+    DBUG_RETURN(0);
+
   while (!read_record_info.read_record(&read_record_info) && count<2)
   {
     if (!select->cond->val_int())		// Dosn't match like
@@ -302,13 +307,13 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations,
 
   rkey_id->store((longlong) key_id, TRUE);
   rkey_id->get_key_image(buff, rkey_id->pack_length(), Field::itRAW);
-  int key_res= relations->file->index_read_map(relations->record[0],
-                                               buff, (key_part_map) 1,
-                                               HA_READ_KEY_EXACT);
+  int key_res= relations->file->ha_index_read_map(relations->record[0],
+                                                  buff, (key_part_map) 1,
+                                                  HA_READ_KEY_EXACT);
 
   for ( ;
         !key_res && key_id == (int16) rkey_id->val_int() ;
-	key_res= relations->file->index_next(relations->record[0]))
+	key_res= relations->file->ha_index_next(relations->record[0]))
   {
     uchar topic_id_buff[8];
     longlong topic_id= rtopic_id->val_int();
@@ -316,8 +321,8 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations,
     field->store((longlong) topic_id, TRUE);
     field->get_key_image(topic_id_buff, field->pack_length(), Field::itRAW);
 
-    if (!topics->file->index_read_map(topics->record[0], topic_id_buff,
-                                      (key_part_map)1, HA_READ_KEY_EXACT))
+    if (!topics->file->ha_index_read_map(topics->record[0], topic_id_buff,
+                                         (key_part_map)1, HA_READ_KEY_EXACT))
     {
       memorize_variant_topic(thd,topics,count,find_fields,
 			     names,name,description,example);
@@ -355,10 +360,11 @@ int search_categories(THD *thd, TABLE *categories,
   Field *pcat_id= find_fields[help_category_help_category_id].field;
   int count= 0;
   READ_RECORD read_record_info;
-
   DBUG_ENTER("search_categories");
 
-  init_read_record(&read_record_info, thd, categories, select,1,0,FALSE);
+  /* Should never happen. As this is part of help, we can ignore this */
+  if (init_read_record(&read_record_info, thd, categories, select,1,0,FALSE))
+    DBUG_RETURN(0);
   while (!read_record_info.read_record(&read_record_info))
   {
     if (select && !select->cond->val_int())
@@ -389,10 +395,13 @@ int search_categories(THD *thd, TABLE *categories,
 void get_all_items_for_category(THD *thd, TABLE *items, Field *pfname,
 				SQL_SELECT *select, List<String> *res)
 {
+  READ_RECORD read_record_info;
   DBUG_ENTER("get_all_items_for_category");
 
-  READ_RECORD read_record_info;
-  init_read_record(&read_record_info, thd, items, select,1,0,FALSE);
+  /* Should never happen. As this is part of help, we can ignore this */
+  if (init_read_record(&read_record_info, thd, items, select,1,0,FALSE))
+    DBUG_VOID_RETURN;
+
   while (!read_record_info.read_record(&read_record_info))
   {
     if (!select->cond->val_int())
@@ -534,7 +543,8 @@ int send_variant_2_list(MEM_ROOT *mem_root, Protocol *protocol,
   String **end= pointers + names->elements;
 
   List_iterator<String> it(*names);
-  for (pos= pointers; pos!=end; (*pos++= it++)) ;
+  for (pos= pointers; pos!=end; (*pos++= it++))
+    ;
 
   my_qsort(pointers,names->elements,sizeof(String*),string_ptr_cmp);
 
@@ -635,7 +645,7 @@ bool mysqld_help(THD *thd, const char *mask)
   Protocol *protocol= thd->protocol;
   SQL_SELECT *select;
   st_find_field used_fields[array_elements(init_used_fields)];
-  TABLE_LIST *leaves= 0;
+  List<TABLE_LIST> leaves;
   TABLE_LIST tables[4];
   List<String> topics_list, categories_list, subcategories_list;
   String name, description, example;
@@ -681,7 +691,7 @@ bool mysqld_help(THD *thd, const char *mask)
     thd->lex->select_lex.context.first_name_resolution_table= &tables[0];
   if (setup_tables(thd, &thd->lex->select_lex.context,
                    &thd->lex->select_lex.top_join_list,
-                   tables, &leaves, FALSE))
+                   tables, leaves, FALSE, FALSE))
     goto error;
   memcpy((char*) used_fields, (char*) init_used_fields, sizeof(used_fields));
   if (init_fields(thd, tables, used_fields, array_elements(used_fields)))
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index eaf7f4c895b..b6ab3e4a7ff 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -76,6 +76,7 @@
 #include "rpl_mi.h"
 #include "transaction.h"
 #include "sql_audit.h"
+#include "sql_derived.h"                        // mysql_handle_derived
 
 #ifndef EMBEDDED_LIBRARY
 static bool delayed_get_table(THD *thd, MDL_request *grl_protection_request,
@@ -88,22 +89,13 @@ static void unlink_blobs(register TABLE *table);
 #endif
 static bool check_view_insertability(THD *thd, TABLE_LIST *view);
 
-/* Define to force use of my_malloc() if the allocated memory block is big */
-
-#ifndef HAVE_ALLOCA
-#define my_safe_alloca(size, min_length) my_alloca(size)
-#define my_safe_afree(ptr, size, min_length) my_afree(ptr)
-#else
-#define my_safe_alloca(size, min_length) ((size <= min_length) ? my_alloca(size) : my_malloc(size,MYF(0)))
-#define my_safe_afree(ptr, size, min_length) if (size > min_length) my_free(ptr)
-#endif
-
 /*
   Check that insert/update fields are from the same single table of a view.
 
   SYNOPSIS
     check_view_single_update()
     fields            The insert/update fields to be checked.
+    values            Values to use for update
     view              The view for insert.
     map     [in/out]  The insert table map.
 
@@ -139,13 +131,12 @@ bool check_view_single_update(List<Item> &fields, List<Item> *values,
   {
     it.init(*values);
     while ((item= it++))
-      tables|= item->used_tables();
+      tables|= item->view_used_tables(view);
   }
 
   /* Convert to real table bits */
   tables&= ~PSEUDO_TABLE_BITS;
 
-
   /* Check found map against provided map */
   if (*map)
   {
@@ -157,6 +148,11 @@ bool check_view_single_update(List<Item> &fields, List<Item> *values,
   if (view->check_single_table(&tbl, tables, view) || tbl == 0)
     goto error;
 
+  /*
+    A buffer for the insert values was allocated for the merged view.
+    Use it.
+  */
+  tbl->table->insert_values= view->table->insert_values;
   view->table= tbl->table;
   *map= tables;
 
@@ -179,6 +175,10 @@ error:
     fields                      The insert fields.
     values                      The insert values.
     check_unique                If duplicate values should be rejected.
+    fields_and_values_from_different_maps
+				Set to 1 if fields and values are using
+                                different table maps, like on select ... insert
+    map                         Store here table map for used fields
 
   NOTE
     Clears TIMESTAMP_AUTO_SET_ON_INSERT from table->timestamp_field_type
@@ -256,6 +256,10 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
     */
     table_list->next_local= 0;
     context->resolve_in_table_list_only(table_list);
+    /* 'Unfix' fields to allow correct marking by the setup_fields function. */
+    if (table_list->is_view())
+      unfix_fields(fields);
+
     res= setup_fields(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0);
 
     /* Restore the current context. */
@@ -265,7 +269,7 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
     if (res)
       return -1;
 
-    if (table_list->effective_algorithm == VIEW_ALGORITHM_MERGE)
+    if (table_list->is_view() && table_list->is_merged_derived())
     {
       if (check_view_single_update(fields,
                                    fields_and_values_from_different_maps ?
@@ -293,6 +297,9 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
       }
     }
   }
+  /* Mark virtual columns used in the insert statement */
+  if (table->vfield)
+    table->mark_virtual_columns_for_write(TRUE);
   // For the values we need select_priv
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
   table->grant.want_privilege= (SELECT_ACL & ~table->grant.privilege);
@@ -334,7 +341,8 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list,
                                List<Item> &update_values, table_map *map)
 {
   TABLE *table= insert_table_list->table;
-  my_bool timestamp_mark= 0;
+  my_bool timestamp_mark;
+  LINT_INIT(timestamp_mark);
 
   if (table->timestamp_field)
   {
@@ -350,7 +358,8 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list,
   if (setup_fields(thd, 0, update_fields, MARK_COLUMNS_WRITE, 0, 0))
     return -1;
 
-  if (insert_table_list->effective_algorithm == VIEW_ALGORITHM_MERGE &&
+  if (insert_table_list->is_view() &&
+      insert_table_list->is_merged_derived() &&
       check_view_single_update(update_fields, &update_values,
                                insert_table_list, map))
     return -1;
@@ -660,6 +669,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
   bool transactional_table, joins_freed= FALSE;
   bool changed;
   bool was_insert_delayed= (table_list->lock_type ==  TL_WRITE_DELAYED);
+  bool using_bulk_insert= 0;
   uint value_count;
   ulong counter = 1;
   ulonglong id;
@@ -691,8 +701,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
   /*
     We can't write-delayed into a table locked with LOCK TABLES:
     this will lead to a deadlock, since the delayed thread will
-    never be able to get a lock on the table. QQQ: why not
-    upgrade the lock here instead?
+    never be able to get a lock on the table.
   */
   if (table_list->lock_type == TL_WRITE_DELAYED &&
       thd->locked_tables_mode &&
@@ -703,6 +712,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
              table_list->table_name);
     DBUG_RETURN(TRUE);
   }
+  /*
+    mark the table_list as a target for insert, to skip the DT/view prepare phase 
+    for correct access rights checks
+    TODO: remove this hack
+  */
+  table_list->skip_prepare_derived= TRUE;
 
   if (table_list->lock_type == TL_WRITE_DELAYED)
   {
@@ -714,6 +729,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
     if (open_and_lock_tables(thd, table_list, TRUE, 0))
       DBUG_RETURN(TRUE);
   }
+
   lock_type= table_list->lock_type;
 
   thd_proc_info(thd, "init");
@@ -832,8 +848,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
       Engines can't handle a bulk insert in parallel with a read form the
       same table in the same connection.
     */
-    if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
+    if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
+       values_list.elements > 1)
+    {
+      using_bulk_insert= 1;
       table->file->ha_start_bulk_insert(values_list.elements);
+    }
   }
 
   thd->abort_on_warning= (!ignore && (thd->variables.sql_mode &
@@ -883,7 +903,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
           be overwritten by fill_record() anyway (and fill_record() does not
           use default values in this case).
         */
-        table->record[0][0]= share->default_values[0];
+#ifdef HAVE_valgrind
+        if (table->file->ha_table_flags() && HA_RECORD_MUST_BE_CLEAN_ON_WRITE)
+          restore_record(table,s->default_values);	// Get empty record
+        else
+#endif
+          table->record[0][0]= share->default_values[0];
 
         /* Fix undefined null_bits. */
         if (share->null_bytes > 1 && share->last_null_bit_pos)
@@ -956,8 +981,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
       auto_inc values from the delayed_insert thread as they share TABLE.
     */
     table->file->ha_release_auto_increment();
-    if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
-        table->file->ha_end_bulk_insert() && !error)
+    if (using_bulk_insert && table->file->ha_end_bulk_insert() && !error)
     {
       table->file->print_error(my_errno,MYF(0));
       error=1;
@@ -982,7 +1006,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
 
     if (error <= 0 ||
         thd->transaction.stmt.modified_non_trans_table ||
-        was_insert_delayed)
+	was_insert_delayed)
     {
       if (mysql_bin_log.is_open())
       {
@@ -1095,6 +1119,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
     ::my_ok(thd, info.copied + info.deleted + updated, id, buff);
   }
   thd->abort_on_warning= 0;
+  if (thd->lex->current_select->first_cond_optimization)
+  {
+    thd->lex->current_select->save_leaf_tables(thd);
+    thd->lex->current_select->first_cond_optimization= 0;
+  }
+  
   DBUG_RETURN(FALSE);
 
 abort:
@@ -1223,6 +1253,11 @@ static bool mysql_prepare_insert_check_table(THD *thd, TABLE_LIST *table_list,
   bool insert_into_view= (table_list->view != 0);
   DBUG_ENTER("mysql_prepare_insert_check_table");
 
+  if (!table_list->updatable)
+  {
+    my_error(ER_NON_INSERTABLE_TABLE, MYF(0), table_list->alias, "INSERT");
+    DBUG_RETURN(TRUE);
+  }
   /*
      first table in list is the one we'll INSERT into, requires INSERT_ACL.
      all others require SELECT_ACL only. the ACL requirement below is for
@@ -1233,14 +1268,16 @@ static bool mysql_prepare_insert_check_table(THD *thd, TABLE_LIST *table_list,
   if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
                                     &thd->lex->select_lex.top_join_list,
                                     table_list,
-                                    &thd->lex->select_lex.leaf_tables,
-                                    select_insert, INSERT_ACL, SELECT_ACL))
+                                    thd->lex->select_lex.leaf_tables,
+                                    select_insert, INSERT_ACL, SELECT_ACL,
+                                    TRUE))
     DBUG_RETURN(TRUE);
 
   if (insert_into_view && !fields.elements)
   {
     thd->lex->empty_field_list_on_rset= 1;
-    if (!table_list->table)
+    if (!thd->lex->select_lex.leaf_tables.head()->table ||
+        table_list->is_multitable())
     {
       my_error(ER_VIEW_NO_INSERT_FIELD_LIST, MYF(0),
                table_list->view_db.str, table_list->view_name.str);
@@ -1325,12 +1362,18 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
   bool res= 0;
   table_map map= 0;
   DBUG_ENTER("mysql_prepare_insert");
-  DBUG_PRINT("enter", ("table_list 0x%lx, table 0x%lx, view %d",
+  DBUG_PRINT("enter", ("table_list: 0x%lx  table: 0x%lx  view: %d",
 		       (ulong)table_list, (ulong)table,
 		       (int)insert_into_view));
   /* INSERT should have a SELECT or VALUES clause */
   DBUG_ASSERT (!select_insert || !values);
 
+  if (mysql_handle_derived(thd->lex, DT_INIT))
+    DBUG_RETURN(TRUE); 
+  if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(TRUE); 
+  if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
+    DBUG_RETURN(TRUE); 
   /*
     For subqueries in VALUES() we should not see the table in which we are
     inserting (for INSERT ... SELECT this is done by changing table_list,
@@ -1361,7 +1404,6 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
   if (mysql_prepare_insert_check_table(thd, table_list, fields, select_insert))
     DBUG_RETURN(TRUE);
 
-
   /* Prepare the fields in the statement. */
   if (values)
   {
@@ -1414,6 +1456,18 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
   if (!table)
     table= table_list->table;
 
+  if (!fields.elements && table->vfield)
+  {
+    for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
+    {
+      if ((*vfield_ptr)->stored_in_db)
+      {
+        thd->lex->unit.insert_table_with_stored_vcol= table;
+        break;
+      }
+    }
+  }
+
   if (!select_insert)
   {
     Item *fake_conds= 0;
@@ -1555,7 +1609,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
 	goto err;
       if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
       {
-	if (table->file->rnd_pos(table->record[1],table->file->dup_ref))
+	if (table->file->ha_rnd_pos(table->record[1],table->file->dup_ref))
 	  goto err;
       }
       else
@@ -1576,9 +1630,10 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
 	  }
 	}
 	key_copy((uchar*) key,table->record[0],table->key_info+key_nr,0);
-	if ((error=(table->file->index_read_idx_map(table->record[1],key_nr,
-                                                    (uchar*) key, HA_WHOLE_KEY,
-                                                    HA_READ_KEY_EXACT))))
+	if ((error= (table->file->ha_index_read_idx_map(table->record[1],
+                                                        key_nr, (uchar*) key,
+                                                        HA_WHOLE_KEY,
+                                                        HA_READ_KEY_EXACT))))
 	  goto err;
       }
       if (info->handle_duplicates == DUP_UPDATE)
@@ -1611,7 +1666,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
 
         table->file->restore_auto_increment(prev_insert_id);
         info->touched++;
-        if (!records_are_comparable(table) || compare_records(table))
+        if (!records_are_comparable(table) || compare_record(table))
         {
           if ((error=table->file->ha_update_row(table->record[1],
                                                 table->record[0])) &&
@@ -1820,10 +1875,11 @@ class delayed_row :public ilink {
 public:
   char *record;
   enum_duplicates dup;
-  time_t start_time;
+  my_time_t start_time;
+  ulong start_time_sec_part;
   ulong sql_mode;
   bool auto_increment_field_not_null;
-  bool query_start_used, ignore, log_query;
+  bool query_start_used, ignore, log_query, query_start_sec_part_used;
   bool stmt_depends_on_first_successful_insert_id_in_prev_stmt;
   ulonglong first_successful_insert_id_in_prev_stmt;
   ulonglong forced_insert_id;
@@ -2077,6 +2133,11 @@ bool delayed_get_table(THD *thd, MDL_request *grl_protection_request,
       mysql_mutex_lock(&LOCK_thread_count);
       thread_count++;
       mysql_mutex_unlock(&LOCK_thread_count);
+      /*
+        Annotating delayed inserts is not supported.
+      */
+      di->thd.variables.binlog_annotate_rows_events= 0;
+
       di->thd.set_db(table_list->db, (uint) strlen(table_list->db));
       di->thd.set_query(my_strdup(table_list->table_name,
                                   MYF(MY_WME | ME_FATALERROR)),
@@ -2201,6 +2262,7 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd)
   TABLE *copy;
   TABLE_SHARE *share;
   uchar *bitmap;
+  char *copy_tmp;
   DBUG_ENTER("Delayed_insert::get_local_table");
 
   /* First request insert thread to get a lock */
@@ -2249,20 +2311,21 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd)
     the other record buffers and alignment are unnecessary.
   */
   thd_proc_info(client_thd, "allocating local table");
-  copy= (TABLE*) client_thd->alloc(sizeof(*copy)+
-				   (share->fields+1)*sizeof(Field**)+
-				   share->reclength +
-                                   share->column_bitmap_size*2);
-  if (!copy)
+  copy_tmp= (char*) client_thd->alloc(sizeof(*copy)+
+                                      (share->fields+1)*sizeof(Field**)+
+                                      share->reclength +
+                                      share->column_bitmap_size*3);
+  if (!copy_tmp)
     goto error;
 
   /* Copy the TABLE object. */
+  copy= new (copy_tmp) TABLE;
   *copy= *table;
   /* We don't need to change the file handler here */
   /* Assign the pointers for the field pointers array and the record. */
   field= copy->field= (Field**) (copy + 1);
   bitmap= (uchar*) (field + share->fields + 1);
-  copy->record[0]= (bitmap + share->column_bitmap_size * 2);
+  copy->record[0]= (bitmap + share->column_bitmap_size*3);
   memcpy((char*) copy->record[0], (char*) table->record[0], share->reclength);
   /*
     Make a copy of all fields.
@@ -2304,10 +2367,13 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd)
   copy->def_read_set.bitmap= (my_bitmap_map*) bitmap;
   copy->def_write_set.bitmap= ((my_bitmap_map*)
                                (bitmap + share->column_bitmap_size));
+  copy->def_vcol_set.bitmap= ((my_bitmap_map*)
+                               (bitmap + 2*share->column_bitmap_size));
   copy->tmp_set.bitmap= 0;                      // To catch errors
-  bzero((char*) bitmap, share->column_bitmap_size*2);
+  bzero((char*) bitmap, share->column_bitmap_size*3);
   copy->read_set=  &copy->def_read_set;
   copy->write_set= &copy->def_write_set;
+  copy->vcol_set= &copy->def_vcol_set;
 
   DBUG_RETURN(copy);
 
@@ -2365,8 +2431,10 @@ int write_delayed(THD *thd, TABLE *table, enum_duplicates duplic,
   if (!(row->record= (char*) my_malloc(table->s->reclength, MYF(MY_WME))))
     goto err;
   memcpy(row->record, table->record[0], table->s->reclength);
-  row->start_time=		thd->start_time;
-  row->query_start_used=	thd->query_start_used;
+  row->start_time=                thd->start_time;
+  row->query_start_used=          thd->query_start_used;
+  row->start_time_sec_part=       thd->start_time_sec_part;
+  row->query_start_sec_part_used= thd->query_start_sec_part_used;
   /*
     those are for the binlog: LAST_INSERT_ID() has been evaluated at this
     time, so record does not need it, but statement-based binlogging of the
@@ -2455,6 +2523,7 @@ void kill_delayed_threads(void)
   while ((di= it++))
   {
     di->thd.killed= THD::KILL_CONNECTION;
+    mysql_mutex_lock(&di->thd.LOCK_thd_data);
     if (di->thd.mysys_var)
     {
       mysql_mutex_lock(&di->thd.mysys_var->mutex);
@@ -2472,6 +2541,7 @@ void kill_delayed_threads(void)
       }
       mysql_mutex_unlock(&di->thd.mysys_var->mutex);
     }
+    mysql_mutex_unlock(&di->thd.LOCK_thd_data);
   }
   mysql_mutex_unlock(&LOCK_delayed_insert); // For unlink from list
 }
@@ -2780,13 +2850,14 @@ pthread_handler_t handle_delayed_insert(void *arg)
     DBUG_LEAVE;
   }
 
-  close_thread_tables(thd);			// Free the table
-  thd->mdl_context.release_transactional_locks();
   di->table=0;
   thd->killed= THD::KILL_CONNECTION;	        // If error
-  mysql_cond_broadcast(&di->cond_client);       // Safety
   mysql_mutex_unlock(&di->mutex);
 
+  close_thread_tables(thd);			// Free the table
+  thd->mdl_context.release_transactional_locks();
+  mysql_cond_broadcast(&di->cond_client);       // Safety
+
   mysql_mutex_lock(&LOCK_delayed_create);       // Because of delayed_get_table
   mysql_mutex_lock(&LOCK_delayed_insert);
   /*
@@ -2857,7 +2928,7 @@ bool Delayed_insert::handle_inserts(void)
       or if another thread is removing the current table definition
       from the table cache.
     */
-    my_error(ER_DELAYED_CANT_CHANGE_LOCK,MYF(ME_FATALERROR),
+    my_error(ER_DELAYED_CANT_CHANGE_LOCK, MYF(ME_FATALERROR | ME_NOREFRESH),
              table->s->table_name.str);
     goto err;
   }
@@ -2887,6 +2958,8 @@ bool Delayed_insert::handle_inserts(void)
 
     thd.start_time=row->start_time;
     thd.query_start_used=row->query_start_used;
+    thd.start_time_sec_part=row->start_time_sec_part;
+    thd.query_start_sec_part_used=row->query_start_sec_part_used;
     /*
       To get the exact auto_inc interval to store in the binlog we must not
       use values from the previous interval (of the previous rows).
@@ -3012,10 +3085,11 @@ bool Delayed_insert::handle_inserts(void)
 	if (thr_reschedule_write_lock(*thd.lock->locks,
                                 thd.variables.lock_wait_timeout))
 	{
-    /* This is not known to happen. */
-    my_error(ER_DELAYED_CANT_CHANGE_LOCK,MYF(ME_FATALERROR),
-             table->s->table_name.str);
-    goto err;
+          /* This is not known to happen. */
+          my_error(ER_DELAYED_CANT_CHANGE_LOCK,
+                   MYF(ME_FATALERROR | ME_NOREFRESH),
+                   table->s->table_name.str);
+          goto err;
 	}
 	if (!using_bin_log)
 	  table->file->extra(HA_EXTRA_WRITE_CACHE);
@@ -3105,9 +3179,9 @@ bool mysql_insert_select_prepare(THD *thd)
 {
   LEX *lex= thd->lex;
   SELECT_LEX *select_lex= &lex->select_lex;
-  TABLE_LIST *first_select_leaf_table;
   DBUG_ENTER("mysql_insert_select_prepare");
 
+
   /*
     SELECT_LEX do not belong to INSERT statement, so we can't add WHERE
     clause if table is VIEW
@@ -3120,21 +3194,38 @@ bool mysql_insert_select_prepare(THD *thd)
                            &select_lex->where, TRUE, FALSE, FALSE))
     DBUG_RETURN(TRUE);
 
+  DBUG_ASSERT(select_lex->leaf_tables.elements != 0);
+  List_iterator<TABLE_LIST> ti(select_lex->leaf_tables);
+  TABLE_LIST *table;
+  uint insert_tables;
+
+  if (select_lex->first_cond_optimization)
+  {
+    /* Back up leaf_tables list. */
+    Query_arena *arena= thd->stmt_arena, backup;
+    arena= thd->activate_stmt_arena_if_needed(&backup);  // For easier test
+
+    insert_tables= select_lex->insert_tables;
+    while ((table= ti++) && insert_tables--)
+    {
+      select_lex->leaf_tables_exec.push_back(table);
+      table->tablenr_exec= table->table->tablenr;
+      table->map_exec= table->table->map;
+      table->maybe_null_exec= table->table->maybe_null;
+    }
+    if (arena)
+      thd->restore_active_arena(arena, &backup);
+  }
+  ti.rewind();
   /*
     exclude first table from leaf tables list, because it belong to
     INSERT
   */
-  DBUG_ASSERT(select_lex->leaf_tables != 0);
-  lex->leaf_tables_insert= select_lex->leaf_tables;
   /* skip all leaf tables belonged to view where we are insert */
-  for (first_select_leaf_table= select_lex->leaf_tables->next_leaf;
-       first_select_leaf_table &&
-       first_select_leaf_table->belong_to_view &&
-       first_select_leaf_table->belong_to_view ==
-       lex->leaf_tables_insert->belong_to_view;
-       first_select_leaf_table= first_select_leaf_table->next_leaf)
-  {}
-  select_lex->leaf_tables= first_select_leaf_table;
+  insert_tables= select_lex->insert_tables;
+  while ((table= ti++) && insert_tables--)
+    ti.remove();
+
   DBUG_RETURN(FALSE);
 }
 
@@ -3352,7 +3443,7 @@ void select_insert::cleanup()
 select_insert::~select_insert()
 {
   DBUG_ENTER("~select_insert");
-  if (table)
+  if (table && table->created)
   {
     table->next_number_field=0;
     table->auto_increment_field_not_null= FALSE;
@@ -3364,7 +3455,7 @@ select_insert::~select_insert()
 }
 
 
-bool select_insert::send_data(List<Item> &values)
+int select_insert::send_data(List<Item> &values)
 {
   DBUG_ENTER("select_insert::send_data");
   bool error=0;
@@ -3472,8 +3563,7 @@ bool select_insert::send_eof()
   table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
   table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
 
-  changed= (info.copied || info.deleted || info.updated);
-  if (changed)
+  if ((changed= (info.copied || info.deleted || info.updated)))
   {
     /*
       We must invalidate the table in the query cache before binlog writing
@@ -3665,20 +3755,20 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
 
   tmp_table.s->db_create_options=0;
   tmp_table.s->blob_ptr_size= portable_sizeof_char_ptr;
-  tmp_table.s->db_low_byte_first= 
-        test(create_info->db_type == myisam_hton ||
-             create_info->db_type == heap_hton);
-  tmp_table.null_row=tmp_table.maybe_null=0;
+  tmp_table.null_row= 0;
+  tmp_table.maybe_null= 0;
 
   while ((item=it++))
   {
     Create_field *cr_field;
     Field *field, *def_field;
     if (item->type() == Item::FUNC_ITEM)
+    {
       if (item->result_type() != STRING_RESULT)
         field= item->tmp_table_field(&tmp_table);
       else
         field= item->tmp_table_field_from_field_type(&tmp_table, 0);
+    }
     else
       field= create_tmp_field(thd, &tmp_table, item, item->type(),
                               (Item ***) 0, &tmp_field, &def_field, 0, 0, 0, 0,
@@ -3752,7 +3842,11 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
       }
     }
     if (!table)                                   // open failed
+    {
+      if (!thd->is_error())                     // CREATE ... IF NOT EXISTS
+        my_ok(thd);                             //   succeed, but did nothing
       DBUG_RETURN(0);
+    }
   }
 
   DBUG_EXECUTE_IF("sleep_create_select_before_lock", my_sleep(6000000););
@@ -3767,6 +3861,12 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
   if (! ((*lock)= mysql_lock_tables(thd, &table, 1, 0)) ||
         hooks->postlock(&table, 1))
   {
+    /* purecov: begin tested */
+    /*
+      This can happen in innodb when you get a deadlock when using same table
+      in insert and select
+    */
+    my_error(ER_CANT_LOCK, MYF(0), my_errno);
     if (*lock)
     {
       mysql_unlock_tables(thd, *lock);
@@ -3774,6 +3874,7 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
     }
     drop_open_table(thd, table, create_table->db, create_table->table_name);
     DBUG_RETURN(0);
+    /* purecov: end */
   }
   DBUG_RETURN(table);
 }
@@ -3889,7 +3990,7 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
     DBUG_RETURN(-1);
   }
 
- /* First field to copy */
+  /* First field to copy */
   field= table->field+table->s->fields - values.elements;
 
   /* Mark all fields that are given values */
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
new file mode 100644
index 00000000000..b0fdb7a1c42
--- /dev/null
+++ b/sql/sql_join_cache.cc
@@ -0,0 +1,4576 @@
+/* Copyright (C) 2000-2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/**
+  @file
+
+  @brief
+  join cache optimizations
+
+  @defgroup Query_Optimizer  Query Optimizer
+  @{
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "key.h"
+#include "sql_base.h"
+#include "sql_select.h"
+#include "opt_subselect.h"
+
+#define NO_MORE_RECORDS_IN_BUFFER  (uint)(-1)
+
+static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save);
+
+/*****************************************************************************
+ *  Join cache module
+******************************************************************************/
+
+/* 
+  Fill in the descriptor of a flag field associated with a join cache    
+
+  SYNOPSIS
+    add_field_flag_to_join_cache()
+      str           position in a record buffer to copy the field from/to
+      length        length of the field 
+      field  IN/OUT pointer to the field descriptor to fill in 
+
+  DESCRIPTION
+    The function fill in the descriptor of a cache flag field to which
+    the parameter 'field' points to. The function uses the first two
+    parameters to set the position in the record buffer from/to which 
+    the field value is to be copied and the length of the copied fragment. 
+    Before returning the result the function increments the value of
+    *field by 1.
+    The function ignores the fields 'blob_length' and 'ofset' of the
+    descriptor.
+
+  RETURN VALUE
+    the length of the field  
+*/
+
+static
+uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
+{
+  CACHE_FIELD *copy= *field;
+  copy->str= str;
+  copy->length= length;
+  copy->type= 0;
+  copy->field= 0;
+  copy->referenced_field_no= 0;
+  (*field)++;
+  return length;    
+}
+
+
+/* 
+  Fill in the descriptors of table data fields associated with a join cache    
+
+  SYNOPSIS
+    add_table_data_fields_to_join_cache()
+      tab              descriptors of fields from this table are to be filled
+      field_set        descriptors for only these fields are to be created
+      field_cnt IN/OUT     counter of data fields  
+      descr  IN/OUT        pointer to the first descriptor to be filled
+      field_ptr_cnt IN/OUT counter of pointers to the data fields
+      descr_ptr IN/OUT     pointer to the first pointer to blob descriptors 
+
+  DESCRIPTION
+    The function fills in the descriptors of cache data fields from the table
+    'tab'. The descriptors are filled only for the fields marked in the 
+    bitmap 'field_set'. 
+    The function fills the descriptors starting from the position pointed
+    by 'descr'. If an added field is of a BLOB type then a pointer to the 
+    its descriptor is added to the array descr_ptr.   
+    At the return 'descr' points to the position after the last added
+    descriptor  while 'descr_ptr' points to the position right after the
+    last added pointer.  
+
+  RETURN VALUE
+    the total length of the added fields  
+*/
+
+static
+uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, 
+                                         MY_BITMAP *field_set,
+                                         uint *field_cnt, 
+                                         CACHE_FIELD **descr,
+                                         uint *field_ptr_cnt,
+                                         CACHE_FIELD ***descr_ptr)
+{
+  Field **fld_ptr;
+  uint len= 0;
+  CACHE_FIELD *copy= *descr;
+  CACHE_FIELD **copy_ptr= *descr_ptr;
+  uint used_fields= bitmap_bits_set(field_set);
+  for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
+  {
+    if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
+    {
+      len+= (*fld_ptr)->fill_cache_field(copy);
+      if (copy->type == CACHE_BLOB)
+      {
+        *copy_ptr= copy;
+        copy_ptr++;
+        (*field_ptr_cnt)++;
+      }
+      copy->field= *fld_ptr;
+      copy->referenced_field_no= 0;
+      copy++;
+      (*field_cnt)++;
+      used_fields--;
+    }
+  }
+  *descr= copy;
+  *descr_ptr= copy_ptr;
+  return len;
+}
+
+/* 
+  Determine different counters of fields associated with a record in the cache  
+
+  SYNOPSIS
+    calc_record_fields()
+
+  DESCRIPTION
+    The function counts the number of total fields stored in a record
+    of the cache and saves this number in the 'fields' member. It also
+    determines the number of flag fields and the number of blobs.
+    The function sets 'with_match_flag' on if 'join_tab' needs a match flag
+    i.e. if it is the first inner table of an outer join or a semi-join.  
+
+  RETURN VALUE
+    none 
+*/
+
+void JOIN_CACHE::calc_record_fields()
+{
+  JOIN_TAB *tab;
+
+  if (prev_cache)
+    tab= prev_cache->join_tab;
+  else
+  {
+    if (join_tab->bush_root_tab)
+    {
+      /* 
+        --ot1--SJM1--------------ot2--...
+                |
+                |
+                +-it1--...--itN
+                        ^____________ this->join_tab is somewhere here, 
+                                      inside an sjm nest.
+
+        The join buffer should store the values of it1.*, it2.*, ..
+        It should not store values of ot1.*.
+      */
+      tab= join_tab->bush_root_tab->bush_children->start;
+    }
+    else
+    {
+      /*
+        -ot1--ot2--SJM1--SJM2--------------ot3--...--otN
+                    |     |                      ^   
+                    |     +-it21--...--it2N      |
+                    |                            \-- we're somewhere here,
+                    +-it11--...--it1N                at the top level
+        
+        The join buffer should store the values of 
+
+          ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ...
+        
+        that is, we should start from the first non-const top-level table. 
+
+        We will need to store columns of SJ-inner tables (it_X_Y.*), but we're
+        not interested in storing the columns of materialization tables
+        themselves. Beause of that, if the first non-const top-level table is a
+        materialized table, we move to its bush_children:
+      */
+      tab= join->join_tab + join->const_tables;
+      if (tab->bush_children)
+        tab= tab->bush_children->start;
+    }
+  }
+  DBUG_ASSERT(!tab->bush_children);
+
+  start_tab= tab;
+  fields= 0;
+  blobs= 0;
+  flag_fields= 0;
+  data_field_count= 0;
+  data_field_ptr_count= 0;
+  referenced_fields= 0;
+
+  /*
+    The following loop will get inside SJM nests, because data may be unpacked
+    to sjm-inner tables.
+  */
+  for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {	    
+    tab->calc_used_field_length(FALSE);
+    flag_fields+= test(tab->used_null_fields || tab->used_uneven_bit_fields);
+    flag_fields+= test(tab->table->maybe_null);
+    fields+= tab->used_fields;
+    blobs+= tab->used_blobs;
+
+    fields+= tab->check_rowid_field();
+  }
+  if ((with_match_flag= join_tab->use_match_flag()))
+    flag_fields++;
+  fields+= flag_fields;
+}
+
+
+/* 
+  Collect information on join key arguments  
+
+  SYNOPSIS
+    collect_info_on_key_args()
+
+  DESCRIPTION
+    The function traverses the ref expressions that are used to access the
+    joined table join_tab. For each table 'tab' whose fields are to be stored
+    in the join buffer of the cache the function finds the fields from 'tab'
+    that occur in the ref expressions and marks these fields in the bitmap
+    tab->table->tmp_set. The function counts the number of them stored
+    in this cache and the total number of them stored in the previous caches
+    and saves the results of the counting in 'local_key_arg_fields' and
+    'external_key_arg_fields' respectively.
+
+  NOTES
+    The function does not do anything if no key is used to join the records
+    from join_tab.
+    
+  RETURN VALUE
+    none 
+*/  
+
+void JOIN_CACHE::collect_info_on_key_args()
+{
+  JOIN_TAB *tab;
+  JOIN_CACHE *cache;
+  local_key_arg_fields= 0;
+  external_key_arg_fields= 0;
+
+  if (!is_key_access())
+    return;
+
+  TABLE_REF *ref= &join_tab->ref;
+  cache= this;
+  do
+  {
+    for (tab= cache->start_tab; tab != cache->join_tab;
+         tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+    { 
+      uint key_args;
+      bitmap_clear_all(&tab->table->tmp_set);
+      for (uint i= 0; i < ref->key_parts; i++)
+      {
+        Item *ref_item= ref->items[i]; 
+        if (!(tab->table->map & ref_item->used_tables()))
+	  continue;
+	 ref_item->walk(&Item::add_field_to_set_processor, 1,
+                        (uchar *) tab->table);
+      }
+      if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
+      {
+        if (cache == this)
+          local_key_arg_fields+= key_args;
+        else
+          external_key_arg_fields+= key_args;
+      }
+    }
+    cache= cache->prev_cache;
+  } 
+  while (cache);
+
+  return;
+}
+
+
+/* 
+  Allocate memory for descriptors and pointers to them associated with the cache  
+
+  SYNOPSIS
+    alloc_fields()
+
+  DESCRIPTION
+    The function allocates memory for the array of fields descriptors
+    and the array of pointers to the field descriptors used to copy
+    join record data from record buffers into the join buffer and
+    backward. Some pointers refer to the field descriptor associated
+    with previous caches. They are placed at the beginning of the array
+    of pointers and its total number is stored in external_key_arg_fields.
+    The pointer of the first array is assigned to field_descr and the number
+    of the elements in it is precalculated by the function calc_record_fields. 
+    The allocated arrays are adjacent.
+  
+  NOTES
+    The memory is allocated in join->thd->memroot
+
+  RETURN VALUE
+    pointer to the first array  
+*/
+
+int JOIN_CACHE::alloc_fields()
+{
+  uint ptr_cnt= external_key_arg_fields+blobs+1;
+  uint fields_size= sizeof(CACHE_FIELD)*fields;
+  field_descr= (CACHE_FIELD*) sql_alloc(fields_size +
+                                        sizeof(CACHE_FIELD*)*ptr_cnt);
+  blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size);
+  return (field_descr == NULL);
+}  
+
+
+/* 
+  Create descriptors of the record flag fields stored in the join buffer 
+
+  SYNOPSIS
+    create_flag_fields()
+
+  DESCRIPTION
+    The function creates descriptors of the record flag fields stored
+    in the join buffer. These are descriptors for:
+    - an optional match flag field,
+    - table null bitmap fields, 
+    - table null row fields.
+    The match flag field is created when 'join_tab' is the first inner
+    table of an outer join our a semi-join. A null bitmap field is
+    created for any table whose fields are to be stored in the join
+    buffer if at least one of these fields is nullable or is a BIT field
+    whose bits are partially stored with null bits. A null row flag
+    is created for any table assigned to the cache if it is an inner
+    table of an outer join.
+    The descriptor for flag fields are placed one after another at the
+    beginning of the array of field descriptors 'field_descr' that
+    contains 'fields' elements. If there is a match flag field the 
+    descriptor for it is always first in the sequence of flag fields.
+    The descriptors for other flag fields can follow in an arbitrary
+    order. 
+    The flag field values follow in a record stored in the join buffer
+    in the same order as field descriptors, with the match flag always
+    following first.
+    The function sets the value of 'flag_fields' to the total number
+    of the descriptors created for the flag fields.
+    The function sets the value of 'length' to the total length of the
+    flag fields.
+  
+  RETURN VALUE
+    none
+*/
+
+void JOIN_CACHE::create_flag_fields()
+{
+  CACHE_FIELD *copy;
+  JOIN_TAB *tab;
+
+  copy= field_descr;
+
+  length=0;
+
+  /* If there is a match flag the first field is always used for this flag */ 
+  if (with_match_flag)
+    length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
+                                          sizeof(join_tab->found),
+	                                  &copy);
+
+  /* Create fields for all null bitmaps and null row flags that are needed */
+  for (tab= start_tab; tab != join_tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
+    TABLE *table= tab->table;
+
+    /* Create a field for the null bitmap from table if needed */
+    if (tab->used_null_fields || tab->used_uneven_bit_fields)			    
+      length+= add_flag_field_to_join_cache(table->null_flags,
+                                            table->s->null_bytes,
+                                            &copy);
+ 
+    /* Create table for the null row flag if needed */
+    if (table->maybe_null)
+      length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
+                                            sizeof(table->null_row),
+                                            &copy);
+  }
+
+  /* Theoretically the new value of flag_fields can be less than the old one */   
+  flag_fields= copy-field_descr;
+}
+
+
+/* 
+  Create descriptors of the fields used to build access keys to the joined table
+
+  SYNOPSIS
+    create_key_arg_fields()
+
+  DESCRIPTION
+    The function creates descriptors of the record fields stored in the join
+    buffer that are used to build access keys to the joined table. These
+    fields are put into the buffer ahead of other records fields stored in
+    the buffer. Such placement helps to optimize construction of access keys.
+    For each field that is used to build access keys to the joined table but
+    is stored in some other join cache buffer the function saves a pointer
+    to the the field descriptor. The array of such pointers are placed in the
+    the join cache structure just before the array of pointers to the
+    blob fields blob_ptr.
+    Any field stored in a join cache buffer that is used to construct keys
+    to access tables associated with other join caches is called a referenced
+    field. It receives a unique number that is saved by the function in the
+    member 'referenced_field_no' of the CACHE_FIELD descriptor for the field.
+    This number is used as index to the array of offsets to the referenced
+    fields that are saved and put in the join cache buffer after all record
+    fields.
+    The function also finds out whether that the keys to access join_tab
+    can be considered as embedded and, if so, sets the flag 'use_emb_key' in
+    this join cache appropriately. 
+     
+  NOTES.
+    When a key to access the joined table 'join_tab' is constructed the array
+    of pointers to the field descriptors for the external fields is looked
+    through. For each of this pointers we find out in what previous key cache
+    the referenced field is stored. The value of 'referenced_field_no'
+    provides us with the index into the array of offsets for referenced 
+    fields stored in the join cache. The offset read by the the index allows
+    us to read the field without reading all other fields of the record 
+    stored the join cache buffer. This optimizes the construction of keys
+    to access 'join_tab' when some key arguments are stored in the previous
+    join caches.  
+
+  NOTES
+    The function does not do anything if no key is used to join the records
+    from join_tab.
+ 
+  RETURN VALUE
+    none
+*/
+void JOIN_CACHE::create_key_arg_fields()
+{
+  JOIN_TAB *tab;
+  JOIN_CACHE *cache;
+
+  if (!is_key_access())
+    return;
+
+  /* 
+    Save pointers to the cache fields in previous caches
+    that  are used to build keys for this key access.
+  */
+  cache= this;
+  uint ext_key_arg_cnt= external_key_arg_fields;
+  CACHE_FIELD *copy;
+  CACHE_FIELD **copy_ptr= blob_ptr;
+  while (ext_key_arg_cnt)
+  {
+    cache= cache->prev_cache;
+    for (tab= cache->start_tab; tab != cache->join_tab; 
+         tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+    { 
+      CACHE_FIELD *copy_end;
+      MY_BITMAP *key_read_set= &tab->table->tmp_set;
+      /* key_read_set contains the bitmap of tab's fields referenced by ref */ 
+      if (bitmap_is_clear_all(key_read_set))
+        continue;
+      copy_end= cache->field_descr+cache->fields;
+      for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
+      {
+        /*
+          (1) - when we store rowids for DuplicateWeedout, they have
+                copy->field==NULL
+        */
+        if (copy->field &&  // (1)
+            copy->field->table == tab->table &&
+            bitmap_is_set(key_read_set, copy->field->field_index))
+        {
+          *copy_ptr++= copy; 
+          ext_key_arg_cnt--;
+          if (!copy->referenced_field_no)
+          {
+            /* 
+              Register the referenced field 'copy': 
+              - set the offset number in copy->referenced_field_no,
+              - adjust the value of the flag 'with_length',
+              - adjust the values of 'pack_length' and 
+                of 'pack_length_with_blob_ptrs'.
+	    */
+            copy->referenced_field_no= ++cache->referenced_fields;
+            if (!cache->with_length)
+            {
+              cache->with_length= TRUE;
+              uint sz= cache->get_size_of_rec_length();
+              cache->base_prefix_length+= sz;
+              cache->pack_length+= sz;
+              cache->pack_length_with_blob_ptrs+= sz;
+            }
+	    cache->pack_length+= cache->get_size_of_fld_offset();
+            cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
+          }        
+        }
+      }
+    } 
+  }
+  /* After this 'blob_ptr' shall not be be changed */ 
+  blob_ptr= copy_ptr;
+  
+  /* Now create local fields that are used to build ref for this key access */
+  copy= field_descr+flag_fields;
+  for (tab= start_tab; tab != join_tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
+    length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
+                                                 &data_field_count, &copy,
+                                                 &data_field_ptr_count, 
+                                                 &copy_ptr);
+  }
+
+  use_emb_key= check_emb_key_usage();
+
+  return;
+}
+
+
+/* 
+  Create descriptors of all remaining data fields stored in the join buffer    
+
+  SYNOPSIS
+    create_remaining_fields()
+
+  DESCRIPTION
+    The function creates descriptors for all remaining data fields of a
+    record from the join buffer. If the value returned by is_key_access() is
+    false the function creates fields for all read record fields that
+    comprise the partial join record joined with join_tab. Otherwise, 
+    for each table tab, the set of the read fields for which the descriptors
+    have to be added is determined as the difference between all read fields
+    and and those for which the descriptors have been already created.
+    The latter are supposed to be marked in the bitmap tab->table->tmp_set.
+    The function increases the value of 'length' to the the total length of
+    the added fields.
+   
+  NOTES
+    If is_key_access() returns true the function modifies the value of
+    tab->table->tmp_set for a each table whose fields are stored in the cache.
+    The function calls the method Field::fill_cache_field to figure out
+    the type of the cache field and the maximal length of its representation
+    in the join buffer. If this is a blob field then additionally a pointer
+    to this field is added as an element of the array blob_ptr. For a blob
+    field only the size of the length of the blob data is taken into account.
+    It is assumed that 'data_field_count' contains the number of descriptors
+    for data fields that have been already created and 'data_field_ptr_count'
+    contains the number of the pointers to such descriptors having been
+    stored up to the moment.
+
+  RETURN VALUE
+    none 
+*/
+
+void JOIN_CACHE::create_remaining_fields()
+{
+  JOIN_TAB *tab;
+  bool all_read_fields= !is_key_access();
+  CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
+  CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
+
+  for (tab= start_tab; tab != join_tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
+    MY_BITMAP *rem_field_set;
+    TABLE *table= tab->table;
+
+    if (all_read_fields)
+      rem_field_set= table->read_set;
+    else
+    {
+      bitmap_invert(&table->tmp_set);
+      bitmap_intersect(&table->tmp_set, table->read_set);
+      rem_field_set= &table->tmp_set;
+    }  
+
+    length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
+                                                 &data_field_count, &copy,
+                                                 &data_field_ptr_count,
+                                                 &copy_ptr);
+  
+    /* SemiJoinDuplicateElimination: allocate space for rowid if needed */
+    if (tab->keep_current_rowid)
+    {
+      copy->str= table->file->ref;
+      if (copy->str)
+        copy->length= table->file->ref_length;
+      else
+      {
+        /* This may happen only for materialized derived tables and views */
+        copy->length= 0;
+        copy->str= (uchar *) table;
+      } 
+      copy->type= CACHE_ROWID;
+      copy->field= 0;
+      copy->referenced_field_no= 0;
+      length+= copy->length;
+      data_field_count++;
+      copy++;
+    }
+  }
+}
+
+
+
+/* 
+  Calculate and set all cache constants      
+
+  SYNOPSIS
+    set_constants()
+
+  DESCRIPTION
+    The function calculates and set all precomputed constants that are used
+    when writing records into the join buffer and reading them from it.
+    It calculates the size of offsets of a record within the join buffer
+    and of a field within a record. It also calculates the number of bytes
+    used to store record lengths.
+    The function also calculates the maximal length of the representation
+    of record in the cache excluding blob_data. This value is used when
+    making a dicision whether more records should be added into the join
+    buffer or not.
+  
+  RETURN VALUE
+    none 
+*/
+
+void JOIN_CACHE::set_constants()
+{ 
+  /* 
+    Any record from a BKA cache is prepended with the record length.
+    We use the record length when reading the buffer and building key values
+    for each record. The length allows us not to read the fields that are
+    not needed for keys.
+    If a record has match flag it also may be skipped when the match flag
+    is on. It happens if the cache is used for a semi-join operation or
+    for outer join when the 'not exist' optimization can be applied.
+    If some of the fields are referenced from other caches then
+    the record length allows us to easily reach the saved offsets for
+    these fields since the offsets are stored at the very end of the record.
+    However at this moment we don't know whether we have referenced fields for
+    the cache or not. Later when a referenced field is registered for the cache
+    we adjust the value of the flag 'with_length'.
+  */ 
+  with_length= is_key_access() || 
+               join_tab->is_inner_table_of_semi_join_with_first_match() ||
+               join_tab->is_inner_table_of_outer_join();
+  /* 
+     At this moment we don't know yet the value of 'referenced_fields',
+     but in any case it can't be greater than the value of 'fields'.
+  */
+  uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
+            (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
+            sizeof(ulong);
+  buff_size= max(join->thd->variables.join_buff_size, 2*len);
+  size_of_rec_ofs= offset_size(buff_size);
+  size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len); 
+  size_of_fld_ofs= size_of_rec_len;
+  base_prefix_length= (with_length ? size_of_rec_len : 0) +
+                      (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
+  /* 
+    The size of the offsets for referenced fields will be added later.
+    The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
+    every time when the first reference to the referenced field is registered.
+  */
+  pack_length= (with_length ? size_of_rec_len : 0) +
+               (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + 
+               length;
+  pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
+}
+
+
+/* 
+  Get maximum total length of all affixes of a record in the join cache buffer
+
+  SYNOPSIS
+    get_record_max_affix_length()
+
+  DESCRIPTION
+    The function calculates the maximum possible total length of all affixes
+    of a record in the join cache buffer, that is made of:
+      - the length of all prefixes used in this cache,
+      - the length of the match flag if it's needed
+      - the total length of the maximum possible offsets to the fields of
+        a record in the buffer.
+
+  RETURN VALUE
+    The maximum total length of all affixes of a record in the join buffer  
+*/ 
+     
+uint JOIN_CACHE::get_record_max_affix_length()
+{
+  uint len= get_prefix_length() +
+            test(with_match_flag) + 
+            size_of_fld_ofs * data_field_count;
+  return len;
+}
+
+
+/* 
+  Get the minimum possible size of the cache join buffer 
+
+  SYNOPSIS
+    get_min_join_buffer_size()
+
+  DESCRIPTION
+    At the first its invocation for the cache the function calculates the
+    minimum possible size of the join buffer of the cache. This value depends
+    on the minimal number of records 'min_records' to be stored in the join
+    buffer. The number is supposed to be determined by the procedure that 
+    chooses the best access path to the joined table join_tab in the execution
+    plan. After the calculation of the interesting size the function saves it
+    in the field 'min_buff_size' in order to use it directly at the next     
+    invocations of the function.
+
+  NOTES
+    Currently the number of minimal records is just set to 1.
+
+  RETURN VALUE
+    The minimal possible size of the join buffer of this cache 
+*/
+
+ulong JOIN_CACHE::get_min_join_buffer_size()
+{
+  if (!min_buff_size)
+  {
+    size_t len= 0;
+    for (JOIN_TAB *tab= start_tab; tab != join_tab; 
+         tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+    {
+      len+= tab->get_max_used_fieldlength();
+    }
+    len+= get_record_max_affix_length() + get_max_key_addon_space_per_record();  
+    size_t min_sz= len*min_records;
+    size_t add_sz= 0;
+    for (uint i=0; i < min_records; i++)
+      add_sz+= join_tab_scan->aux_buffer_incr(i+1);
+    avg_aux_buffer_incr= add_sz/min_records;
+    min_sz+= add_sz;
+    min_sz+= pack_length_with_blob_ptrs;
+    set_if_bigger(min_sz, 1);
+    min_buff_size= min_sz;
+  }
+  return min_buff_size;
+}
+
+
+/* 
+  Get the maximum possible size of the cache join buffer 
+
+  SYNOPSIS
+    get_max_join_buffer_size()
+
+    optimize_buff_size  FALSE <-> do not take more memory than needed for
+                        the estimated number of records in the partial join 
+
+  DESCRIPTION
+    At the first its invocation for the cache the function calculates the
+    maximum possible size of join buffer for the cache. If the parameter
+    optimize_buff_size true then this value does not exceed the size of the
+    space needed for the estimated number of records 'max_records' in the
+    partial join that joins tables from the first one through join_tab. This
+    value is also capped off by the value of join_tab->join_buffer_size_limit,
+    if it has been set a to non-zero value, and by the value of the system
+    parameter join_buffer_size - otherwise. After the calculation of the
+    interesting size the function saves the value in the field 'max_buff_size'
+    in order to use it directly at the next  invocations of the function.
+
+  NOTES
+    Currently the value of join_tab->join_buffer_size_limit is initialized
+    to 0 and is never reset.
+
+  RETURN VALUE
+    The maximum possible size of the join buffer of this cache 
+*/
+
+ulong JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size)
+{
+  if (!max_buff_size)
+  {
+    size_t max_sz;
+    size_t min_sz= get_min_join_buffer_size(); 
+    size_t len= 0;
+    for (JOIN_TAB *tab= start_tab; tab != join_tab;
+         tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+    {
+      len+= tab->get_used_fieldlength();
+    }
+    len+= get_record_max_affix_length();
+    avg_record_length= len;
+    len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr;
+    space_per_record= len;
+    
+    size_t limit_sz= join->thd->variables.join_buff_size;
+    if (join_tab->join_buffer_size_limit)
+      set_if_smaller(limit_sz, join_tab->join_buffer_size_limit);
+    if (!optimize_buff_size)
+      max_sz= limit_sz;
+    else
+    {    
+      if (limit_sz / max_records > space_per_record)
+        max_sz= space_per_record * max_records;
+      else
+        max_sz= limit_sz;
+      max_sz+= pack_length_with_blob_ptrs;
+      set_if_smaller(max_sz, limit_sz);
+    }
+    set_if_bigger(max_sz, min_sz);
+    max_buff_size= max_sz;
+  }
+  return max_buff_size;
+}    
+      
+
+/* 
+  Allocate memory for a join buffer      
+
+  SYNOPSIS
+    alloc_buffer()
+
+  DESCRIPTION
+    The function allocates a lump of memory for the cache join buffer. 
+    Initially the function sets the size of the buffer buff_size equal to
+    the value returned by get_max_join_buffer_size(). If the total size of
+    the space intended to be used for the join buffers employed by the
+    tables from the first one through join_tab exceeds the value of the
+    system parameter join_buff_space_limit, then the function first tries
+    to shrink the used buffers to make the occupied space fit the maximum
+    memory allowed to be used for all join buffers in total. After
+    this the function tries to allocate a join buffer for join_tab.
+    If it fails to do so, it decrements the requested size of the join
+    buffer, shrinks proportionally the join buffers used for the previous
+    tables and tries to allocate a buffer for join_tab. In the case of a
+    failure the function repeats its attempts with smaller and smaller
+    requested sizes of the buffer, but not more than 4 times.
+  
+  RETURN VALUE
+    0   if the memory has been successfully allocated
+    1   otherwise
+*/
+
+int JOIN_CACHE::alloc_buffer()
+{
+  JOIN_TAB *tab;
+  JOIN_CACHE *cache;
+  ulonglong curr_buff_space_sz= 0;
+  ulonglong curr_min_buff_space_sz= 0;
+  ulonglong join_buff_space_limit=
+    join->thd->variables.join_buff_space_limit;
+  bool optimize_buff_size= 
+         optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE);
+  double partial_join_cardinality=  (join_tab-1)->get_partial_join_cardinality();
+  buff= NULL;
+  min_buff_size= 0;
+  max_buff_size= 0;
+  min_records= 1;
+  max_records= (size_t) (partial_join_cardinality <= join_buff_space_limit ?
+                 (ulonglong) partial_join_cardinality : join_buff_space_limit);
+  set_if_bigger(max_records, 10);
+  min_buff_size= get_min_join_buffer_size();
+  buff_size= get_max_join_buffer_size(optimize_buff_size);
+
+  for (tab= start_tab; tab!= join_tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
+    cache= tab->cache;
+    if (cache)
+    {
+      curr_min_buff_space_sz+= cache->get_min_join_buffer_size();
+      curr_buff_space_sz+= cache->get_join_buffer_size();
+    }
+  }
+
+  if (curr_min_buff_space_sz > join_buff_space_limit ||
+      (curr_buff_space_sz > join_buff_space_limit &&
+       (!optimize_buff_size || 
+        join->shrink_join_buffers(join_tab, curr_buff_space_sz,
+                                  join_buff_space_limit))))
+    goto fail;
+                               
+  for (ulong buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; )
+  {
+    ulong next_buff_size;
+
+    if ((buff= (uchar*) my_malloc(buff_size, MYF(0))))
+      break;
+
+    next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0;
+    if (next_buff_size < min_buff_size ||
+        join->shrink_join_buffers(join_tab, curr_buff_space_sz,
+                                  curr_buff_space_sz-buff_size_decr))
+      goto fail;
+    buff_size= next_buff_size;
+
+    curr_buff_space_sz= 0;
+    for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++)
+    {
+      cache= tab->cache;
+      if (cache)
+        curr_buff_space_sz+= cache->get_join_buffer_size();
+    } 
+  }
+  return 0;
+
+fail:
+  buff_size= 0;
+  return 1;
+}
+
+ 
+/*
+  Shrink the size if the cache join buffer in a given ratio
+
+  SYNOPSIS
+    shrink_join_buffer_in_ratio()
+      n           nominator of the ratio to shrink the buffer in
+      d           denominator if the ratio
+
+  DESCRIPTION
+    The function first deallocates the join buffer of the cache. Then
+    it allocates a buffer that is (n/d) times smaller.
+    
+  RETURN VALUE
+    FALSE   on success with allocation of the smaller join buffer 
+    TRUE    otherwise       
+*/
+
+bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d)
+{
+  size_t next_buff_size;
+  if (n < d)
+    return FALSE;
+  next_buff_size= (size_t) ((double) buff_size / n * d);
+  set_if_bigger(next_buff_size, min_buff_size);
+  buff_size= next_buff_size;
+  return realloc_buffer();
+}  
+
+
+/*
+  Reallocate the join buffer of a join cache
+ 
+  SYNOPSIS
+    realloc_buffer()
+
+  DESCRITION
+    The function reallocates the join buffer of the join cache. After this
+    it resets the buffer for writing.
+
+  NOTES
+    The function assumes that buff_size contains the new value for the join
+    buffer size.  
+
+  RETURN VALUE
+    0   if the buffer has been successfully reallocated
+    1   otherwise
+*/
+
+int JOIN_CACHE::realloc_buffer()
+{
+  int rc;
+  free();
+  rc= test(!(buff= (uchar*) my_malloc(buff_size, MYF(0))));
+  reset(TRUE);
+  return rc;   	
+}
+  
+
+/* 
+  Initialize a join cache       
+
+  SYNOPSIS
+    init()
+
+  DESCRIPTION
+    The function initializes the join cache structure. It supposed to be called
+    by init methods for classes derived from the JOIN_CACHE.
+    The function allocates memory for the join buffer and for descriptors of
+    the record fields stored in the buffer.
+
+  NOTES
+    The code of this function should have been included into the constructor
+    code itself. However the new operator for the class JOIN_CACHE would
+    never fail while memory allocation for the join buffer is not absolutely
+    unlikely to fail. That's why this memory allocation has to be placed in a
+    separate function that is called in a couple with a cache constructor.
+    It is quite natural to put almost all other constructor actions into
+    this function.     
+  
+  RETURN VALUE
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE::init()
+{
+  DBUG_ENTER("JOIN_CACHE::init");
+
+  calc_record_fields();
+
+  collect_info_on_key_args();
+
+  if (alloc_fields())
+    DBUG_RETURN(1);
+
+  create_flag_fields();
+
+  create_key_arg_fields();
+
+  create_remaining_fields();
+
+  set_constants();
+
+  if (alloc_buffer())
+    DBUG_RETURN(1); 
+  
+  reset(TRUE); 
+
+  DBUG_RETURN(0);
+}
+
+
+/* 
+  Check the possibility to read the access keys directly from the join buffer       
+  SYNOPSIS
+    check_emb_key_usage()
+
+  DESCRIPTION
+    The function checks some conditions at which the key values can be read
+    directly from the join buffer. This is possible when the key values can be
+    composed by concatenation of the record fields stored in the join buffer.
+    Sometimes when the access key is multi-component the function has to re-order
+    the fields written into the join buffer to make keys embedded. If key 
+    values for the key access are detected as embedded then 'use_emb_key'
+    is set to TRUE.
+
+  EXAMPLE
+    Let table t2 has an index defined on the columns a,b . Let's assume also
+    that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
+    of the integer type. Then if the query
+      SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b  
+    is executed with a join cache in such a way that t1 is the driving
+    table then the key values to access table t2 can be read directly
+    from the join buffer.
+  
+  NOTES
+    In some cases key values could be read directly from the join buffer but
+    we still do not consider them embedded. In the future we'll expand the
+    the class of keys which we identify as embedded.
+
+  NOTES
+    The function returns FALSE if no key is used to join the records
+    from join_tab.
+
+  RETURN VALUE
+    TRUE    key values will be considered as embedded,
+    FALSE   otherwise.
+*/
+
+bool JOIN_CACHE::check_emb_key_usage()
+{
+
+  if (!is_key_access())
+    return FALSE;
+
+  uint i;
+  Item *item; 
+  KEY_PART_INFO *key_part;
+  CACHE_FIELD *copy;
+  CACHE_FIELD *copy_end;
+  uint len= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
+
+  /* 
+    If some of the key arguments are not from the local cache the key
+    is not considered as embedded.
+    TODO:
+    Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
+  */  
+  if (external_key_arg_fields != 0)
+    return FALSE;
+  /* 
+    If the number of the local key arguments is not equal to the number
+    of key parts the key value cannot be read directly from the join buffer.   
+  */
+  if (local_key_arg_fields != ref->key_parts)
+    return FALSE;
+
+  /* 
+    A key is not considered embedded if one of the following is true:
+    - one of its key parts is not equal to a field
+    - it is a partial key
+    - definition of the argument field does not coincide with the
+      definition of the corresponding key component
+    - some of the key components are nullable
+  */  
+  for (i=0; i < ref->key_parts; i++)
+  {
+    item= ref->items[i]->real_item();
+    if (item->type() != Item::FIELD_ITEM)
+      return FALSE;
+    key_part= keyinfo->key_part+i;
+    if (key_part->key_part_flag & HA_PART_KEY_SEG)
+      return FALSE;
+    if (!key_part->field->eq_def(((Item_field *) item)->field))
+      return FALSE;
+    if (key_part->field->maybe_null())
+      return FALSE;
+  }
+  
+  copy= field_descr+flag_fields;
+  copy_end= copy+local_key_arg_fields;
+  for ( ; copy < copy_end; copy++)
+  {
+    /* 
+      If some of the key arguments are of variable length the key
+      is not considered as embedded.
+    */
+    if (copy->type != 0)
+      return FALSE;
+    /* 
+      If some of the key arguments are bit fields whose bits are partially
+      stored with null bits the key is not considered as embedded.
+    */
+    if (copy->field->type() == MYSQL_TYPE_BIT &&
+	 ((Field_bit*) (copy->field))->bit_len)
+      return FALSE;
+    len+= copy->length;
+  }
+
+  emb_key_length= len;
+
+  /* 
+    Make sure that key fields follow the order of the corresponding
+    key components these fields are equal to. For this the descriptors
+    of the fields that comprise the key might be re-ordered.
+  */
+  for (i= 0; i < ref->key_parts; i++)
+  {
+    uint j;
+    Item *item= ref->items[i]->real_item();
+    Field *fld= ((Item_field *) item)->field;
+    CACHE_FIELD *init_copy= field_descr+flag_fields+i; 
+    for (j= i, copy= init_copy; i < local_key_arg_fields;  i++, copy++)
+    {
+      if (fld->eq(copy->field))
+      {
+        if (j != i)
+        {
+          CACHE_FIELD key_part_copy= *copy;
+          *copy= *init_copy;
+          *init_copy= key_part_copy;
+        }
+        break;
+      }
+    }
+  }
+
+  return TRUE;
+}    
+
+
+/* 
+  Write record fields and their required offsets into the join cache buffer
+
+  SYNOPSIS
+    write_record_data()
+      link        a reference to the associated info in the previous cache
+      is_full OUT true if it has been decided that no more records will be
+                  added to the join buffer
+
+  DESCRIPTION
+    This function put into the cache buffer the following info that it reads
+    from the join record buffers or computes somehow:
+    (1) the length of all fields written for the record (optional)
+    (2) an offset to the associated info in the previous cache (if there is any)
+        determined by the link parameter
+    (3) all flag fields of the tables whose data field are put into the cache:
+        - match flag (optional),
+        - null bitmaps for all tables,
+        - null row flags for all tables
+    (4) values of all data fields including
+        - full images of those fixed legth data fields that cannot have 
+          trailing spaces
+        - significant part of fixed length fields that can have trailing spaces
+          with the prepanded length 
+        - data of non-blob variable length fields with the prepanded data length  
+        - blob data from blob fields with the prepanded data length
+    (5) record offset values for the data fields that are referred to from 
+        other caches
+ 
+    The record is written at the current position stored in the field 'pos'.
+    At the end of the function 'pos' points at the position right after the 
+    written record data.
+    The function increments the number of records in the cache that is stored
+    in the 'records' field by 1. The function also modifies the values of
+    'curr_rec_pos' and 'last_rec_pos' to point to the written record.
+    The 'end_pos' cursor is modified accordingly.
+    The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data 
+    remains in the record buffers and not copied to the join buffer. It may
+    happen only to the blob data from the last record added into the cache.
+    If on_precond is attached to join_tab and it is not evaluated to TRUE
+    then MATCH_IMPOSSIBLE is placed in the match flag field of the record
+    written into the join buffer.
+       
+  RETURN VALUE
+    length of the written record data
+*/
+
+uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
+{
+  uint len;
+  bool last_record;
+  CACHE_FIELD *copy;
+  CACHE_FIELD *copy_end;
+  uchar *flags_pos;
+  uchar *cp= pos;
+  uchar *init_pos= cp;
+  uchar *rec_len_ptr= 0;
+  uint key_extra= extra_key_length();
+ 
+  records++;  /* Increment the counter of records in the cache */
+
+  len= pack_length + key_extra;
+
+  /* Make an adjustment for the size of the auxiliary buffer if there is any */
+  uint incr= aux_buffer_incr(records);
+  size_t rem= rem_space();
+  aux_buff_size+= len+incr < rem ? incr : rem;
+
+  /*
+    For each blob to be put into cache save its length and a pointer
+    to the value in the corresponding element of the blob_ptr array.
+    Blobs with null values are skipped.
+    Increment 'len' by the total length of all these blobs. 
+  */    
+  if (blobs)
+  {
+    CACHE_FIELD **copy_ptr= blob_ptr;
+    CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
+    for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
+    {
+      Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field;
+      if (!blob_field->is_null())
+      {
+        uint blob_len= blob_field->get_length();
+        (*copy_ptr)->blob_length= blob_len;
+        len+= blob_len;
+        blob_field->get_ptr(&(*copy_ptr)->str);
+      }
+    }
+  }
+
+  /*
+    Check whether we won't be able to add any new record into the cache after
+    this one because the cache will be full. Set last_record to TRUE if it's so.
+    The assume that the cache will be full after the record has been written
+    into it if either the remaining space of the cache is not big enough for the 
+    record's blob values or if there is a chance that not all non-blob fields
+    of the next record can be placed there.
+    This function is called only in the case when there is enough space left in
+    the cache to store at least non-blob parts of the current record.
+  */
+  last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space();
+  
+  /* 
+    Save the position for the length of the record in the cache if it's needed.
+    The length of the record will be inserted here when all fields of the record
+    are put into the cache.  
+  */
+  if (with_length)
+  {
+    rec_len_ptr= cp;   
+    cp+= size_of_rec_len;
+  }
+
+  /*
+    Put a reference to the fields of the record that are stored in the previous
+    cache if there is any. This reference is passed by the 'link' parameter.     
+  */
+  if (prev_cache)
+  {
+    cp+= prev_cache->get_size_of_rec_offset();
+    prev_cache->store_rec_ref(cp, link);
+  } 
+
+  curr_rec_pos= cp;
+  
+  /* If the there is a match flag set its value to 0 */
+  copy= field_descr;
+  if (with_match_flag)
+    *copy[0].str= 0;
+
+  /* First put into the cache the values of all flag fields */
+  copy_end= field_descr+flag_fields;
+  flags_pos= cp;
+  for ( ; copy < copy_end; copy++)
+  {
+    memcpy(cp, copy->str, copy->length);
+    cp+= copy->length;
+  } 
+  
+  /* Now put the values of the remaining fields as soon as they are not nulls */ 
+  copy_end= field_descr+fields;
+  for ( ; copy < copy_end; copy++)
+  {
+    Field *field= copy->field;
+    if (field && field->maybe_null() && field->is_null())
+    {    
+      if (copy->referenced_field_no)
+        copy->offset= 0;
+      continue;              
+    }
+    /* Save the offset of the field to put it later at the end of the record */ 
+    if (copy->referenced_field_no)
+      copy->offset= cp-curr_rec_pos;
+
+    if (copy->type == CACHE_BLOB)
+    {
+      Field_blob *blob_field= (Field_blob *) copy->field;
+      if (last_record)
+      {
+        last_rec_blob_data_is_in_rec_buff= 1;
+        /* Put down the length of the blob and the pointer to the data */  
+	blob_field->get_image(cp, copy->length+sizeof(char*),
+                              blob_field->charset());
+	cp+= copy->length+sizeof(char*);
+      }
+      else
+      {
+        /* First put down the length of the blob and then copy the data */ 
+	blob_field->get_image(cp, copy->length, 
+			      blob_field->charset());
+	memcpy(cp+copy->length, copy->str, copy->blob_length);               
+	cp+= copy->length+copy->blob_length;
+      }
+    }
+    else
+    {
+      switch (copy->type) {
+      case CACHE_VARSTR1:
+        /* Copy the significant part of the short varstring field */ 
+        len= (uint) copy->str[0] + 1;
+        memcpy(cp, copy->str, len);
+        cp+= len;
+        break;
+      case CACHE_VARSTR2:
+        /* Copy the significant part of the long varstring field */
+        len= uint2korr(copy->str) + 2;
+        memcpy(cp, copy->str, len);
+        cp+= len;
+        break;
+      case CACHE_STRIPPED:
+      {
+        /* 
+          Put down the field value stripping all trailing spaces off.
+          After this insert the length of the written sequence of bytes.
+        */ 
+	uchar *str, *end;
+	for (str= copy->str, end= str+copy->length;
+	     end > str && end[-1] == ' ';
+	     end--) ;
+	len=(uint) (end-str);
+        int2store(cp, len);
+	memcpy(cp+2, str, len);
+	cp+= len+2;
+        break;
+      }
+      case CACHE_ROWID:
+        if (!copy->length)
+	{
+          /*
+            This may happen only for ROWID fields of materialized
+            derived tables and views.
+	  */
+	  TABLE *table= (TABLE *) copy->str;
+          copy->str= table->file->ref;
+          copy->length= table->file->ref_length;
+        }
+        /* fall through */
+      default:      
+        /* Copy the entire image of the field from the record buffer */
+	memcpy(cp, copy->str, copy->length);
+	cp+= copy->length;
+      }
+    }
+  }
+  
+  /* Add the offsets of the fields that are referenced from other caches */ 
+  if (referenced_fields)
+  {
+    uint cnt= 0;
+    for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
+    {
+      if (copy->referenced_field_no)
+      {
+        store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1),
+                         copy->offset);
+        cnt++;
+      }
+    }
+    cp+= size_of_fld_ofs*cnt;
+  }
+
+  if (rec_len_ptr)
+    store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
+  last_rec_pos= curr_rec_pos; 
+  end_pos= pos= cp;
+  *is_full= last_record;
+
+  last_written_is_null_compl= 0;   
+  if (!join_tab->first_unmatched && join_tab->on_precond)
+  { 
+    join_tab->found= 0;
+    join_tab->not_null_compl= 1;
+    if (!join_tab->on_precond->val_int())
+    {
+      flags_pos[0]= MATCH_IMPOSSIBLE;     
+      last_written_is_null_compl= 1;
+    }
+  } 
+      
+  return (uint) (cp-init_pos);
+}
+
+
+/* 
+  Reset the join buffer for reading/writing: default implementation
+
+  SYNOPSIS
+    reset()
+      for_writing  if it's TRUE the function reset the buffer for writing
+
+  DESCRIPTION
+    This default implementation of the virtual function reset() resets 
+    the join buffer for reading or writing.
+    If the buffer is reset for reading only the 'pos' value is reset
+    to point to the very beginning of the join buffer. If the buffer is
+    reset for writing additionally: 
+    - the counter of the records in the buffer is set to 0,
+    - the the value of 'last_rec_pos' gets pointing at the position just
+      before the buffer, 
+    - 'end_pos' is set to point to the beginning of the join buffer,
+    - the size of the auxiliary buffer is reset to 0,
+    - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
+    
+  RETURN VALUE
+    none
+*/
+void JOIN_CACHE::reset(bool for_writing)
+{
+  pos= buff;
+  curr_rec_link= 0;
+  if (for_writing)
+  {
+    records= 0;
+    last_rec_pos= buff;
+    aux_buff_size= 0;
+    end_pos= pos;
+    last_rec_blob_data_is_in_rec_buff= 0;
+  }
+}
+
+
+/* 
+  Add a record into the join buffer: the default implementation
+
+  SYNOPSIS
+    put_record()
+
+  DESCRIPTION
+    This default implementation of the virtual function put_record writes
+    the next matching record into the join buffer.
+    It also links the record having been written into the join buffer with
+    the matched record in the previous cache if there is any.
+    The implementation assumes that the function get_curr_link() 
+    will return exactly the pointer to this matched record.
+
+  RETURN VALUE
+    TRUE    if it has been decided that it should be the last record
+            in the join buffer,
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE::put_record()
+{
+  bool is_full;
+  uchar *link= 0;
+  if (prev_cache)
+    link= prev_cache->get_curr_rec_link();
+  write_record_data(link, &is_full);
+  return is_full;
+}
+  
+
+/* 
+  Read the next record from the join buffer: the default implementation
+
+  SYNOPSIS
+    get_record()
+
+  DESCRIPTION
+    This default implementation of the virtual function get_record
+    reads fields of the next record from the join buffer of this cache.
+    The function also reads all other fields associated with this record
+    from the the join buffers of the previous caches. The fields are read
+    into the corresponding record buffers.
+    It is supposed that 'pos' points to the position in the buffer 
+    right after the previous record when the function is called.
+    When the function returns the 'pos' values is updated to point
+    to the position after the read record.
+    The value of 'curr_rec_pos' is also updated by the function to
+    point to the beginning of the first field of the record in the
+    join buffer.    
+
+  RETURN VALUE
+    TRUE    there are no more records to read from the join buffer
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE::get_record()
+{ 
+  bool res;
+  uchar *prev_rec_ptr= 0;
+  if (with_length)
+    pos+= size_of_rec_len;
+  if (prev_cache)
+  {
+    pos+= prev_cache->get_size_of_rec_offset();
+    prev_rec_ptr= prev_cache->get_rec_ref(pos);
+  }
+  curr_rec_pos= pos;
+  if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER))
+  {
+    pos+= referenced_fields*size_of_fld_ofs;
+    if (prev_cache)
+      prev_cache->get_record_by_pos(prev_rec_ptr);
+  } 
+  return res; 
+}
+
+
+/* 
+  Read a positioned record from the join buffer: the default implementation
+
+  SYNOPSIS
+    get_record_by_pos()
+      rec_ptr  position of the first field of the record in the join buffer
+
+  DESCRIPTION
+    This default implementation of the virtual function get_record_pos
+    reads the fields of the record positioned at 'rec_ptr' from the join buffer.
+    The function also reads all other fields associated with this record 
+    from the the join buffers of the previous caches. The fields are read
+    into the corresponding record buffers.
+
+  RETURN VALUE
+    none
+*/
+
+void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
+{
+  uchar *save_pos= pos;
+  pos= rec_ptr;
+  read_all_record_fields();
+  pos= save_pos;
+  if (prev_cache)
+  {
+    uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
+    prev_cache->get_record_by_pos(prev_rec_ptr);
+  }
+}
+
+
+/* 
+  Get the match flag from the referenced record: the default implementation
+
+  SYNOPSIS
+    get_match_flag_by_pos()
+      rec_ptr  position of the first field of the record in the join buffer
+
+  DESCRIPTION
+    This default implementation of the virtual function get_match_flag_by_pos
+    get the match flag for the record pointed by the reference at the position
+    rec_ptr. If the match flag is placed in one of the previous buffers the
+    function first reaches the linked record fields in this buffer.
+
+  RETURN VALUE
+    match flag for the record at the position rec_ptr
+*/
+
+enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
+{
+  Match_flag match_fl= MATCH_NOT_FOUND;
+  if (with_match_flag)
+  {
+    match_fl= (enum Match_flag) rec_ptr[0];
+    return match_fl;
+  }
+  if (prev_cache)
+  {
+    uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
+    return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
+  } 
+  DBUG_ASSERT(0);
+  return match_fl;
+}
+
+
+/* 
+  Calculate the increment of the auxiliary buffer for a record write
+
+  SYNOPSIS
+    aux_buffer_incr()
+      recno   the number of the record the increment to be calculated for
+
+  DESCRIPTION
+    This function calls the aux_buffer_incr the method of the
+    companion member join_tab_scan to calculate the growth of the
+    auxiliary buffer when the recno-th record is added to the
+    join_buffer of this cache.
+
+  RETURN VALUE
+    the number of bytes in the increment 
+*/
+
+uint JOIN_CACHE::aux_buffer_incr(ulong recno)
+{ 
+  return join_tab_scan->aux_buffer_incr(recno);
+}
+
+/* 
+  Read all flag and data fields of a record from the join buffer
+
+  SYNOPSIS
+    read_all_record_fields()
+
+  DESCRIPTION
+    The function reads all flag and data fields of a record from the join
+    buffer into the corresponding record buffers.
+    The fields are read starting from the position 'pos' which is
+    supposed to point to the beginning og the first record field.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN VALUE
+    (-1)   if there is no more records in the join buffer
+    length of the data read from the join buffer - otherwise
+*/
+
+uint JOIN_CACHE::read_all_record_fields()
+{
+  uchar *init_pos= pos;
+  
+  if (pos > last_rec_pos || !records)
+    return NO_MORE_RECORDS_IN_BUFFER;
+
+  /* First match flag, read null bitmaps and null_row flag for each table */
+  read_flag_fields();
+ 
+  /* Now read the remaining table fields if needed */
+  CACHE_FIELD *copy= field_descr+flag_fields;
+  CACHE_FIELD *copy_end= field_descr+fields;
+  bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
+  for ( ; copy < copy_end; copy++)
+    read_record_field(copy, blob_in_rec_buff);
+
+  return (uint) (pos-init_pos);
+}
+
+
+/* 
+  Read all flag fields of a record from the join buffer
+
+  SYNOPSIS
+    read_flag_fields()
+
+  DESCRIPTION
+    The function reads all flag fields of a record from the join
+    buffer into the corresponding record buffers.
+    The fields are read starting from the position 'pos'.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN VALUE
+    length of the data read from the join buffer
+*/
+
+uint JOIN_CACHE::read_flag_fields()
+{
+  uchar *init_pos= pos;
+  CACHE_FIELD *copy= field_descr;
+  CACHE_FIELD *copy_end= copy+flag_fields;
+  if (with_match_flag)
+  {
+    copy->str[0]= test((Match_flag) pos[0] == MATCH_FOUND);
+    pos+= copy->length;
+    copy++;    
+  } 
+  for ( ; copy < copy_end; copy++)
+  {
+    memcpy(copy->str, pos, copy->length);
+    pos+= copy->length;
+  }
+  return (pos-init_pos);
+}
+
+
+/* 
+  Read a data record field from the join buffer
+
+  SYNOPSIS
+    read_record_field()
+      copy             the descriptor of the data field to be read
+      blob_in_rec_buff indicates whether this is the field from the record
+                       whose blob data are in record buffers
+
+  DESCRIPTION
+    The function reads the data field specified by the parameter copy
+    from the join buffer into the corresponding record buffer. 
+    The field is read starting from the position 'pos'.
+    The data of blob values is not copied from the join buffer.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN VALUE
+    length of the data read from the join buffer
+*/
+
+uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
+{
+  uint len;
+  /* Do not copy the field if its value is null */ 
+  if (copy->field && copy->field->maybe_null() && copy->field->is_null())
+    return 0;           
+  if (copy->type == CACHE_BLOB)
+  {
+    Field_blob *blob_field= (Field_blob *) copy->field;
+    /* 
+      Copy the length and the pointer to data but not the blob data 
+      itself to the record buffer
+    */ 
+    if (blob_in_rec_buff)
+    {
+      blob_field->set_image(pos, copy->length+sizeof(char*),
+			    blob_field->charset());
+      len= copy->length+sizeof(char*);
+    }
+    else
+    {
+      blob_field->set_ptr(pos, pos+copy->length);
+      len= copy->length+blob_field->get_length();
+    }
+  }
+  else
+  {
+    switch (copy->type) {
+    case CACHE_VARSTR1:
+      /* Copy the significant part of the short varstring field */
+      len= (uint) pos[0] + 1;
+      memcpy(copy->str, pos, len);
+      break;
+    case CACHE_VARSTR2:
+      /* Copy the significant part of the long varstring field */
+      len= uint2korr(pos) + 2;
+      memcpy(copy->str, pos, len);
+      break;
+    case CACHE_STRIPPED:
+      /* Pad the value by spaces that has been stripped off */
+      len= uint2korr(pos);
+      memcpy(copy->str, pos+2, len);
+      memset(copy->str+len, ' ', copy->length-len);
+      len+= 2;
+      break;
+    default:
+      /* Copy the entire image of the field from the record buffer */
+      len= copy->length;
+      memcpy(copy->str, pos, len);
+    }
+  }
+  pos+= len;
+  return len;
+}
+
+
+/* 
+  Read a referenced field from the join buffer
+
+  SYNOPSIS
+    read_referenced_field()
+      copy         pointer to the descriptor of the referenced field
+      rec_ptr      pointer to the record that may contain this field
+      len  IN/OUT  total length of the record fields 
+
+  DESCRIPTION
+    The function checks whether copy points to a data field descriptor
+    for this cache object. If it does not then the function returns
+    FALSE. Otherwise the function reads the field of the record in
+    the join buffer pointed by 'rec_ptr' into the corresponding record
+    buffer and returns TRUE.
+    If the value of *len is 0 then the function sets it to the total
+    length of the record fields including possible trailing offset
+    values. Otherwise *len is supposed to provide this value that
+    has been obtained earlier. 
+
+  NOTE
+    If the value of the referenced field is null then the offset
+    for the value is set to 0. If the value of a field can be null
+    then the value of flag_fields is always positive. So the offset
+    for any non-null value cannot be 0 in this case. 
+
+  RETURN VALUE
+    TRUE   'copy' points to a data descriptor of this join cache
+    FALSE  otherwise
+*/
+
+bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
+                                       uchar *rec_ptr, 
+                                       uint *len)
+{
+  uchar *ptr;
+  uint offset;
+  if (copy < field_descr || copy >= field_descr+fields)
+    return FALSE;
+  if (!*len)
+  {
+    /* Get the total length of the record fields */ 
+    uchar *len_ptr= rec_ptr;
+    if (prev_cache)
+      len_ptr-= prev_cache->get_size_of_rec_offset();
+    *len= get_rec_length(len_ptr-size_of_rec_len);
+  }
+  
+  ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0);  
+  offset= get_fld_offset(ptr+ *len - 
+                         size_of_fld_ofs*
+                         (referenced_fields+1-copy->referenced_field_no));  
+  bool is_null= FALSE;
+  Field *field= copy->field;
+  if (offset == 0 && flag_fields)
+    is_null= TRUE;
+  if (is_null)
+  {
+    field->set_null();
+    if (!field->real_maybe_null())
+      field->table->null_row= 1;
+  }
+  else
+  {
+    uchar *save_pos= pos;
+    field->set_notnull(); 
+    if (!field->real_maybe_null())
+      field->table->null_row= 0;
+    pos= rec_ptr+offset;
+    read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
+    pos= save_pos;
+  }
+  return TRUE;
+}
+   
+
+/* 
+  Skip record from join buffer if's already matched: default implementation
+
+  SYNOPSIS
+    skip_if_matched()
+
+  DESCRIPTION
+    This default implementation of the virtual function skip_if_matched
+    skips the next record from the join buffer if its  match flag is set to 
+    MATCH_FOUND.
+    If the record is skipped the value of 'pos' is set to point to the position
+    right after the record.
+
+  RETURN VALUE
+    TRUE   the match flag is set to MATCH_FOUND and the record has been skipped
+    FALSE  otherwise
+*/
+
+bool JOIN_CACHE::skip_if_matched()
+{
+  DBUG_ASSERT(with_length);
+  uint offset= size_of_rec_len;
+  if (prev_cache)
+    offset+= prev_cache->get_size_of_rec_offset();
+  /* Check whether the match flag is MATCH_FOUND */
+  if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND)
+  {
+    pos+= size_of_rec_len + get_rec_length(pos);
+    return TRUE;
+  }
+  return FALSE;
+}      
+
+
+/* 
+  Skip record from join buffer if the match isn't needed: default implementation
+
+  SYNOPSIS
+    skip_if_not_needed_match()
+
+  DESCRIPTION
+    This default implementation of the virtual function skip_if_not_needed_match
+    skips the next record from the join buffer if its match flag is not 
+    MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the
+    first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE
+    and join_tab is the first inner table of an outer join.
+    If the record is skipped the value of 'pos' is set to point to the position
+    right after the record.
+
+  RETURN VALUE
+    TRUE    the record has to be skipped
+    FALSE   otherwise 
+*/
+
+bool JOIN_CACHE::skip_if_not_needed_match()
+{
+  DBUG_ASSERT(with_length);
+  enum Match_flag match_fl;
+  uint offset= size_of_rec_len;
+  if (prev_cache)
+    offset+= prev_cache->get_size_of_rec_offset();
+
+  if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND &&
+      (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) )
+  {
+    pos+= size_of_rec_len + get_rec_length(pos);
+    return TRUE;
+  }
+  return FALSE;
+}      
+
+
+/* 
+  Restore the fields of the last record from the join buffer
+ 
+  SYNOPSIS
+    restore_last_record()
+
+  DESCRIPTION
+    This function restore the values of the fields of the last record put
+    into join buffer in record buffers. The values most probably have been
+    overwritten by the field values from other records when they were read
+    from the join buffer into the record buffer in order to check pushdown
+    predicates.
+
+  RETURN
+    none
+*/
+
+void JOIN_CACHE::restore_last_record()
+{
+  if (records)
+    get_record_by_pos(last_rec_pos);
+}
+
+
+/*
+  Join records from the join buffer with records from the next join table    
+
+  SYNOPSIS
+    join_records()
+      skip_last    do not find matches for the last record from the buffer
+
+  DESCRIPTION
+    The functions extends all records from the join buffer by the matched
+    records from join_tab. In the case of outer join operation it also
+    adds null complementing extensions for the records from the join buffer
+    that have no match. 
+    No extensions are generated for the last record from the buffer if
+    skip_last is true.  
+
+  NOTES
+    The function must make sure that if linked join buffers are used then
+    a join buffer cannot be refilled again until all extensions in the
+    buffers chained to this one are generated.
+    Currently an outer join operation with several inner tables always uses
+    at least two linked buffers with the match join flags placed in the
+    first buffer. Any record composed of rows of the inner tables that
+    matches a record in this buffer must refer to the position of the
+    corresponding match flag.
+
+  IMPLEMENTATION
+    When generating extensions for outer tables of an outer join operation
+    first we generate all extensions for those records from the join buffer
+    that have matches, after which null complementing extension for all
+    unmatched records from the join buffer are generated.  
+      
+  RETURN VALUE
+    return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
+{
+  JOIN_TAB *tab;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
+
+  if (outer_join_first_inner && !join_tab->first_unmatched)
+    join_tab->not_null_compl= TRUE;   
+
+  if (!join_tab->first_unmatched)
+  {
+    /* Find all records from join_tab that match records from join buffer */
+    rc= join_matching_records(skip_last);   
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+    if (outer_join_first_inner)
+    {
+      if (next_cache)
+      {
+        /* 
+          Ensure that all matches for outer records from join buffer are to be
+          found. Now we ensure that all full records are found for records from
+          join buffer. Generally this is an overkill.
+          TODO: Ensure that only matches of the inner table records have to be
+          found for the records from join buffer.
+	*/ 
+        rc= next_cache->join_records(skip_last);
+        if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+          goto finish;
+      }
+      join_tab->not_null_compl= FALSE;
+      /* Prepare for generation of null complementing extensions */
+      for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
+        tab->first_unmatched= join_tab->first_inner;
+    }
+  }
+  if (join_tab->first_unmatched)
+  {
+    if (is_key_access())
+      restore_last_record();
+
+    /* 
+      Generate all null complementing extensions for the records from
+      join buffer that don't have any matching rows from the inner tables.
+    */
+    reset(FALSE);
+    rc= join_null_complements(skip_last);   
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+  }
+  if(next_cache)
+  {
+    /* 
+      When using linked caches we must ensure the records in the next caches
+      that refer to the records in the join buffer are fully extended.
+      Otherwise we could have references to the records that have been
+      already erased from the join buffer and replaced for new records. 
+    */ 
+    rc= next_cache->join_records(skip_last);
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+  }
+  if (outer_join_first_inner)
+  {
+    /* 
+      All null complemented rows have been already generated for all
+      outer records from join buffer. Restore the state of the
+      first_unmatched values to 0 to avoid another null complementing.
+    */
+    for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
+      tab->first_unmatched= 0;
+  } 
+ 
+  if (skip_last)
+  {
+    DBUG_ASSERT(!is_key_access());
+    /*
+       Restore the last record from the join buffer to generate
+       all extentions for it.
+    */
+    get_record();		               
+  }
+
+finish:
+  restore_last_record();
+  reset(TRUE);
+  return rc;
+}
+
+
+/*   
+  Find matches from the next table for records from the join buffer 
+
+  SYNOPSIS
+    join_matching_records()
+      skip_last    do not look for matches for the last partial join record 
+
+  DESCRIPTION
+    The function retrieves rows of the join_tab table and checks whether they
+    match partial join records from the join buffer. If a match is found
+    the function will call the sub_select function trying to look for matches
+    for the remaining join operations.
+    This function currently is called only from the function join_records.    
+    If the value of skip_last is true the function writes the partial join
+    record from the record buffer into the join buffer to save its value for
+    the future processing in the caller function.
+
+  NOTES
+    If employed by BNL or BNLH join algorithms the function performs a full
+    scan of join_tab for each refill of the join buffer. If BKA or BKAH
+    algorithms are used then the function iterates only over those records
+    from join_tab that can be accessed by keys built over records in the join
+    buffer. To apply a proper method of iteration the function just calls
+    virtual iterator methods (open, next, close) of the member join_tab_scan.
+    The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type.
+    The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join
+    algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods
+    for BKA/BKAH join algorithms.
+    When the function looks for records from the join buffer that would
+    match a record from join_tab it iterates either over all records in
+    the buffer or only over selected records. If BNL join operation is
+    performed all records are checked for the match. If BNLH or BKAH
+    algorithm is employed to join join_tab then the function looks only
+    through the records with the same join key as the record from join_tab.
+    With the BKA join algorithm only one record from the join buffer is checked
+    for a match for any record from join_tab. To iterate over the candidates
+    for a match the virtual function get_next_candidate_for_match is used,
+    while the virtual function prepare_look_for_matches is called to prepare
+    for such iteration proccess.     
+
+  NOTES
+    The function produces all matching extensions for the records in the 
+    join buffer following the path of the employed blocked algorithm. 
+    When an outer join operation is performed all unmatched records from
+    the join buffer must be extended by null values. The function 
+    'join_null_complements' serves this purpose.  
+      
+  RETURN VALUE
+    return one of enum_nested_loop_state
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
+{
+  int error;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  join_tab->table->null_row= 0;
+  bool check_only_first_match= join_tab->check_only_first_match();
+  bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
+
+  /* Return at once if there are no records in the join buffer */
+  if (!records)     
+    return NESTED_LOOP_OK;   
+ 
+  /* 
+    When joining we read records from the join buffer back into record buffers.
+    If matches for the last partial join record are found through a call to
+    the sub_select function then this partial join record must be saved in the
+    join buffer in order to be restored just before the sub_select call.
+  */             
+  if (skip_last)     
+    put_record();     
+ 
+  if (join_tab->use_quick == 2 && join_tab->select->quick)
+  { 
+    /* A dynamic range access was used last. Clean up after it */
+    delete join_tab->select->quick;
+    join_tab->select->quick= 0;
+  }
+
+  if ((rc= join_tab_execution_startup(join_tab)) < 0)
+    goto finish2;
+
+  /* Prepare to retrieve all records of the joined table */
+  if ((error= join_tab_scan->open()))
+  { 
+    /* 
+      TODO: if we get here, we will assert in net_send_statement(). Add test
+      coverage and fix.
+    */
+    goto finish;
+  }
+  
+  while (!(error= join_tab_scan->next()))   
+  {
+    if (join->thd->killed)
+    {
+      /* The user has aborted the execution of the query */
+      join->thd->send_kill_message();
+      rc= NESTED_LOOP_KILLED;
+      goto finish; 
+    }
+
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);
+    
+    /* Prepare to read matching candidates from the join buffer */
+    if (prepare_look_for_matches(skip_last))
+      continue;
+
+    uchar *rec_ptr;
+    /* Read each possible candidate from the buffer and look for matches */
+    while ((rec_ptr= get_next_candidate_for_match()))
+    { 
+      /* 
+        If only the first match is needed, and, it has been already found for
+        the next record read from the join buffer, then the record is skipped.
+        Also those records that must be null complemented are not considered
+        as candidates for matches.
+      */
+      if ((!check_only_first_match && !outer_join_first_inner) ||
+          !skip_next_candidate_for_match(rec_ptr))
+      {
+	read_next_candidate_for_match(rec_ptr);
+        rc= generate_full_extensions(rec_ptr);
+        if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+	  goto finish;   
+      }
+    }
+  }
+
+finish: 
+  if (error)                 
+    rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+finish2:    
+  join_tab_scan->close();
+  return rc;
+}
+
+
+/*
+  Set match flag for a record in join buffer if it has not been set yet    
+
+  SYNOPSIS
+    set_match_flag_if_none()
+      first_inner     the join table to which this flag is attached to
+      rec_ptr         pointer to the record in the join buffer 
+
+  DESCRIPTION
+    If the records of the table are accumulated in a join buffer the function
+    sets the match flag for the record in the buffer that is referred to by
+    the record from this cache positioned at 'rec_ptr'. 
+    The function also sets the match flag 'found' of the table first inner
+    if it has not been set before. 
+
+  NOTES
+    The function assumes that the match flag for any record in any cache
+    is placed in the first byte occupied by the record fields. 
+
+  RETURN VALUE
+    TRUE   the match flag is set by this call for the first time
+    FALSE  the match flag has been set before this call
+*/ 
+
+bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
+                                        uchar *rec_ptr)
+{
+  if (!first_inner->cache)
+  {
+    /* 
+      Records of the first inner table to which the flag is attached to
+      are not accumulated in a join buffer.
+    */
+    if (first_inner->found)
+      return FALSE;
+    else
+    {
+      first_inner->found= 1;
+      return TRUE;
+    }
+  }
+  JOIN_CACHE *cache= this;
+  while (cache->join_tab != first_inner)
+  {
+    cache= cache->prev_cache;
+    DBUG_ASSERT(cache);
+    rec_ptr= cache->get_rec_ref(rec_ptr);
+  } 
+  if ((Match_flag) rec_ptr[0] != MATCH_FOUND)
+  {
+    rec_ptr[0]= MATCH_FOUND;
+    first_inner->found= 1;
+    return TRUE;  
+  }
+  return FALSE;
+}
+
+
+/*
+  Generate all full extensions for a partial join record in the buffer    
+
+  SYNOPSIS
+    generate_full_extensions()
+      rec_ptr     pointer to the record from join buffer to generate extensions 
+
+  DESCRIPTION
+    The function first checks whether the current record of 'join_tab' matches
+    the partial join record from join buffer located at 'rec_ptr'. If it is the
+    case the function calls the join_tab->next_select method to generate
+    all full extension for this partial join match.
+      
+  RETURN VALUE
+    return one of enum_nested_loop_state.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
+{
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  
+  /*
+    Check whether the extended partial join record meets
+    the pushdown conditions. 
+  */
+  if (check_match(rec_ptr))
+  {    
+    int res= 0;
+
+    if (!join_tab->check_weed_out_table || 
+        !(res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table)))
+    {
+      set_curr_rec_link(rec_ptr);
+      rc= (join_tab->next_select)(join, join_tab+1, 0);
+      if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      {
+        reset(TRUE);
+        return rc;
+      }
+    }
+    if (res == -1)
+    {
+      rc= NESTED_LOOP_ERROR;
+      return rc;
+    }
+  }
+  return rc;
+}
+
+
+/*
+  Check matching to a partial join record from the join buffer    
+
+  SYNOPSIS
+    check_match()
+      rec_ptr     pointer to the record from join buffer to check matching to 
+
+  DESCRIPTION
+    The function checks whether the current record of 'join_tab' matches
+    the partial join record from join buffer located at 'rec_ptr'. If this is
+    the case and 'join_tab' is the last inner table of a semi-join or an outer
+    join the function turns on the match flag for the 'rec_ptr' record unless
+    it has been already set.
+
+  NOTES
+    Setting the match flag on can trigger re-evaluation of pushdown conditions
+    for the record when join_tab is the last inner table of an outer join.
+      
+  RETURN VALUE
+    TRUE   there is a match
+    FALSE  there is no match
+*/ 
+
+inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
+{
+  /* Check whether pushdown conditions are satisfied */
+  if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0)
+    return FALSE;
+
+  if (!join_tab->is_last_inner_table())
+    return TRUE;
+
+  /* 
+     This is the last inner table of an outer join,
+     and maybe of other embedding outer joins, or
+     this is the last inner table of a semi-join.
+  */
+  JOIN_TAB *first_inner= join_tab->get_first_inner_table();
+  do
+  {
+    set_match_flag_if_none(first_inner, rec_ptr);
+    if (first_inner->check_only_first_match() &&
+        !join_tab->first_inner)
+      return TRUE;
+    /* 
+      This is the first match for the outer table row.
+      The function set_match_flag_if_none has turned the flag
+      first_inner->found on. The pushdown predicates for
+      inner tables must be re-evaluated with this flag on.
+      Note that, if first_inner is the first inner table 
+      of a semi-join, but is not an inner table of an outer join
+      such that 'not exists' optimization can  be applied to it, 
+      the re-evaluation of the pushdown predicates is not needed.
+    */      
+    for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
+    {
+      if (tab->select && tab->select->skip_record(join->thd) <= 0)
+        return FALSE;
+    }
+  }
+  while ((first_inner= first_inner->first_upper) &&
+         first_inner->last_inner == join_tab);
+  
+  return TRUE;
+} 
+
+
+/*
+  Add null complements for unmatched outer records from join buffer    
+
+  SYNOPSIS
+    join_null_complements()
+      skip_last    do not add null complements for the last record 
+
+  DESCRIPTION
+    This function is called only for inner tables of outer joins.
+    The function retrieves all rows from the join buffer and adds null
+    complements for those of them that do not have matches for outer
+    table records.
+    If the 'join_tab' is the last inner table of the embedding outer 
+    join and the null complemented record satisfies the outer join
+    condition then the the corresponding match flag is turned on
+    unless it has been set earlier. This setting may trigger
+    re-evaluation of pushdown conditions for the record. 
+
+  NOTES
+    The same implementation of the virtual method join_null_complements
+    is used for BNL/BNLH/BKA/BKA join algorthm.
+      
+  RETURN VALUE
+    return one of enum_nested_loop_state.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
+{
+  ulonglong cnt; 
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  bool is_first_inner= join_tab == join_tab->first_unmatched;
+ 
+  /* Return at once if there are no records in the join buffer */
+  if (!records)
+    return NESTED_LOOP_OK;
+  
+  cnt= records - (is_key_access() ? 0 : test(skip_last));
+
+  /* This function may be called only for inner tables of outer joins */ 
+  DBUG_ASSERT(join_tab->first_inner);
+
+  for ( ; cnt; cnt--)
+  {
+    if (join->thd->killed)
+    {
+      /* The user has aborted the execution of the query */
+      join->thd->send_kill_message();
+      rc= NESTED_LOOP_KILLED;
+      goto finish;
+    }
+    /* Just skip the whole record if a match for it has been already found */
+    if (!is_first_inner || !skip_if_matched())
+    {
+      get_record();
+      /* The outer row is complemented by nulls for each inner table */
+      restore_record(join_tab->table, s->default_values);
+      mark_as_null_row(join_tab->table);  
+      rc= generate_full_extensions(get_curr_rec());
+      if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+        goto finish;
+    }
+  }
+
+finish:
+  return rc;
+}
+
+
+/*
+  Add a comment on the join algorithm employed by the join cache 
+
+  SYNOPSIS
+    print_explain_comment()
+      str  string to add the comment on the employed join algorithm to
+
+  DESCRIPTION
+    This function adds info on the type of the used join buffer (flat or
+    incremental) and on the type of the the employed join algorithm (BNL,
+    BNLH, BKA or BKAH) to the the end of the sring str.
+
+  RETURN VALUE
+    none
+*/ 
+
+void JOIN_CACHE::print_explain_comment(String *str)
+{
+  str->append(STRING_WITH_LEN(" ("));
+  const char *buffer_type= prev_cache ? "incremental" : "flat";
+  str->append(buffer_type);
+  str->append(STRING_WITH_LEN(", "));
+  
+  const char *join_alg="";
+  switch (get_join_alg()) {
+  case BNL_JOIN_ALG:
+    join_alg= "BNL";
+    break;
+  case BNLH_JOIN_ALG:
+    join_alg= "BNLH";
+    break;
+  case BKA_JOIN_ALG:
+    join_alg= "BKA";
+    break;
+  case BKAH_JOIN_ALG:
+    join_alg= "BKAH";
+    break;
+  default:
+    DBUG_ASSERT(0);
+  }
+
+  str->append(join_alg);
+  str->append(STRING_WITH_LEN(" join"));
+  str->append(STRING_WITH_LEN(")"));
+}
+
+
+static void add_mrr_explain_info(String *str, uint mrr_mode, handler *file)
+{
+  char mrr_str_buf[128]={0};
+  int len;
+  len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf,
+                                           sizeof(mrr_str_buf));
+  if (len > 0)
+  {
+    str->append(STRING_WITH_LEN("; "));
+    str->append(mrr_str_buf, len);
+  }
+}
+
+
+void JOIN_CACHE_BKA::print_explain_comment(String *str)
+{
+  JOIN_CACHE::print_explain_comment(str); 
+  add_mrr_explain_info(str, mrr_mode, join_tab->table->file);
+}
+
+
+void JOIN_CACHE_BKAH::print_explain_comment(String *str)
+{
+  JOIN_CACHE::print_explain_comment(str); 
+  add_mrr_explain_info(str, mrr_mode, join_tab->table->file);
+}
+
+
+/* 
+  Initialize a hashed join cache       
+
+  SYNOPSIS
+    init()
+
+  DESCRIPTION
+    The function initializes the cache structure with a hash table in it.
+    The hash table will be used to store key values for the records from
+    the join buffer.
+    The function allocates memory for the join buffer and for descriptors of
+    the record fields stored in the buffer.
+    The function also initializes a hash table for record keys within the join
+    buffer space.
+
+  NOTES VALUE
+    The function is supposed to be called by the init methods of the classes 
+    derived from JOIN_CACHE_HASHED.
+  
+  RETURN VALUE
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_HASHED::init()
+{
+  int rc= 0;
+  TABLE_REF *ref= &join_tab->ref;
+
+  DBUG_ENTER("JOIN_CACHE_HASHED::init");
+
+  hash_table= 0;
+  key_entries= 0;
+
+  key_length= ref->key_length;
+
+  if ((rc= JOIN_CACHE::init()))
+    DBUG_RETURN (rc);
+
+  if (!(key_buff= (uchar*) sql_alloc(key_length)))
+    DBUG_RETURN(1);
+
+  /* Take into account a reference to the next record in the key chain */
+  pack_length+= get_size_of_rec_offset(); 
+  pack_length_with_blob_ptrs+= get_size_of_rec_offset();
+
+  ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key);
+  ref_used_key_parts= join_tab->ref.key_parts;
+
+  hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple;
+  hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple;
+
+  KEY_PART_INFO *key_part= ref_key_info->key_part;
+  KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts;
+  for ( ; key_part < key_part_end; key_part++)
+  {
+    if (!key_part->field->eq_cmp_as_binary())
+    {
+      hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex;
+      hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex;
+      break;
+    }
+  }
+      
+  init_hash_table();
+
+  rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
+                     (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
+
+  data_fields_offset= 0;
+  if (use_emb_key)
+  {
+    CACHE_FIELD *copy= field_descr;
+    CACHE_FIELD *copy_end= copy+flag_fields;
+    for ( ; copy < copy_end; copy++)
+      data_fields_offset+= copy->length;
+  } 
+
+  DBUG_RETURN(rc);
+}
+
+
+/* 
+  Initialize the hash table of a hashed join cache 
+
+  SYNOPSIS
+    init_hash_table()
+
+  DESCRIPTION
+    The function estimates the number of hash table entries in the hash
+    table to be used and initializes this hash table within the join buffer
+    space.
+
+  RETURN VALUE
+    Currently the function always returns 0;
+*/
+
+int JOIN_CACHE_HASHED::init_hash_table()
+{
+  hash_table= 0;
+  key_entries= 0;
+
+  /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
+  uint max_size_of_key_ofs= max(2, get_size_of_rec_offset());  
+  for (size_of_key_ofs= 2;
+       size_of_key_ofs <= max_size_of_key_ofs;
+       size_of_key_ofs+= 2)
+  {    
+    key_entry_length= get_size_of_rec_offset() + // key chain header
+                      size_of_key_ofs +          // reference to the next key 
+                      (use_emb_key ?  get_size_of_rec_offset() : key_length);
+
+    ulong space_per_rec= avg_record_length +
+                         avg_aux_buffer_incr +
+                         key_entry_length+size_of_key_ofs;
+    uint n= buff_size / space_per_rec;
+
+    /*
+      TODO: Make a better estimate for this upper bound of
+            the number of records in in the join buffer.
+    */
+    uint max_n= buff_size / (pack_length-length+
+                             key_entry_length+size_of_key_ofs);
+
+    hash_entries= (uint) (n / 0.7);
+    set_if_bigger(hash_entries, 1);
+    
+    if (offset_size(max_n*key_entry_length) <=
+        size_of_key_ofs)
+      break;
+  }
+   
+  /* Initialize the hash table */ 
+  hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
+  cleanup_hash_table();
+  curr_key_entry= hash_table;
+
+  return 0;
+}
+
+
+/*
+  Reallocate the join buffer of a hashed join cache
+ 
+  SYNOPSIS
+    realloc_buffer()
+
+  DESCRITION
+    The function reallocates the join buffer of the hashed join cache.
+    After this it initializes a hash table within the buffer space and
+    resets the join cache for writing.
+
+  NOTES
+    The function assumes that buff_size contains the new value for the join
+    buffer size.  
+
+  RETURN VALUE
+    0   if the buffer has been successfully reallocated
+    1   otherwise
+*/
+
+int JOIN_CACHE_HASHED::realloc_buffer()
+{
+  int rc;
+  free();
+  rc= test(!(buff= (uchar*) my_malloc(buff_size, MYF(0))));
+  init_hash_table();
+  reset(TRUE);
+  return rc;   	
+}
+
+
+/*
+  Get maximum size of the additional space per record used for record keys
+
+  SYNOPSYS
+    get_max_key_addon_space_per_record()
+  
+  DESCRIPTION
+    The function returns the size of the space occupied by one key entry
+    and one hash table entry.
+
+  RETURN VALUE
+    maximum size of the additional space per record that is used to store
+    record keys in the hash table
+*/
+
+uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record()
+{
+  ulong len;
+  TABLE_REF *ref= &join_tab->ref;
+  /* 
+    The total number of hash entries in the hash tables is bounded by
+    ceiling(N/0.7) where N is the maximum number of records in the buffer.
+    That's why the multiplier 2 is used in the formula below. 
+  */ 
+  len= (use_emb_key ?  get_size_of_rec_offset() : ref->key_length) +
+        size_of_rec_ofs +    // size of the key chain header
+        size_of_rec_ofs +    // >= size of the reference to the next key 
+        2*size_of_rec_ofs;   // >= 2*( size of hash table entry)
+  return len; 
+}    
+
+
+/* 
+  Reset the buffer of a hashed join cache for reading/writing
+
+  SYNOPSIS
+    reset()
+      for_writing  if it's TRUE the function reset the buffer for writing
+
+  DESCRIPTION
+    This implementation of the virtual function reset() resets the join buffer
+    of the JOIN_CACHE_HASHED class for reading or writing.
+    Additionally to what the default implementation does this function
+    cleans up the hash table allocated within the buffer.  
+    
+  RETURN VALUE
+    none
+*/
+ 
+void JOIN_CACHE_HASHED::reset(bool for_writing)
+{
+  this->JOIN_CACHE::reset(for_writing);
+  if (for_writing && hash_table)
+    cleanup_hash_table();
+  curr_key_entry= hash_table;
+}
+
+
+/* 
+  Add a record into the buffer of a hashed join cache
+
+  SYNOPSIS
+    put_record()
+
+  DESCRIPTION
+    This implementation of the virtual function put_record writes the next
+    matching record into the join buffer of the JOIN_CACHE_HASHED class.
+    Additionally to what the default implementation does this function
+    performs the following. 
+    It extracts from the record the key value used in lookups for matching
+    records and searches for this key in the hash tables from the join cache.
+    If it finds the key in the hash table it joins the record to the chain
+    of records with this key. If the key is not found in the hash table the
+    key is placed into it and a chain containing only the newly added record 
+    is attached to the key entry. The key value is either placed in the hash 
+    element added for the key or, if the use_emb_key flag is set, remains in
+    the record from the partial join.
+    If the match flag field of a record contains MATCH_IMPOSSIBLE the key is
+    not created for this record. 
+    
+  RETURN VALUE
+    TRUE    if it has been decided that it should be the last record
+            in the join buffer,
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE_HASHED::put_record()
+{
+  bool is_full;
+  uchar *key;
+  uint key_len= key_length;
+  uchar *key_ref_ptr;
+  uchar *link= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  uchar *next_ref_ptr= pos;
+
+  pos+= get_size_of_rec_offset();
+  /* Write the record into the join buffer */  
+  if (prev_cache)
+    link= prev_cache->get_curr_rec_link();
+  write_record_data(link, &is_full);
+
+  if (last_written_is_null_compl)
+    return is_full;    
+
+  if (use_emb_key)
+    key= get_curr_emb_key();
+  else
+  {
+    /* Build the key over the fields read into the record buffers */ 
+    cp_buffer_from_ref(join->thd, join_tab->table, ref);
+    key= ref->key_buff;
+  }
+
+  /* Look for the key in the hash table */
+  if (key_search(key, key_len, &key_ref_ptr))
+  {
+    uchar *last_next_ref_ptr;
+    /* 
+      The key is found in the hash table. 
+      Add the record to the circular list of the records attached to this key.
+      Below 'rec' is the record to be added into the record chain for the found
+      key, 'key_ref' points to a flatten representation of the st_key_entry 
+      structure that contains the key and the head of the record chain.
+    */
+    last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
+    /* rec->next_rec= key_entry->last_rec->next_rec */
+    memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
+    /* key_entry->last_rec->next_rec= rec */ 
+    store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
+    /* key_entry->last_rec= rec */
+    store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
+  }
+  else
+  {
+    /* 
+      The key is not found in the hash table.
+      Put the key into the join buffer linking it with the keys for the
+      corresponding hash entry. Create a circular list with one element
+      referencing the record and attach the list to the key in the buffer.
+    */
+    uchar *cp= last_key_entry;
+    cp-= get_size_of_rec_offset()+get_size_of_key_offset();
+    store_next_key_ref(key_ref_ptr, cp);
+    store_null_key_ref(cp);
+    store_next_rec_ref(next_ref_ptr, next_ref_ptr);
+    store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
+    if (use_emb_key)
+    {
+      cp-= get_size_of_rec_offset();
+      store_emb_key_ref(cp, key);
+    }
+    else
+    {
+      cp-= key_len;
+      memcpy(cp, key, key_len);
+    }
+    last_key_entry= cp;
+    DBUG_ASSERT(last_key_entry >= end_pos);
+    /* Increment the counter of key_entries in the hash table */ 
+    key_entries++;
+  }  
+  return is_full;
+}
+
+
+/*
+  Read the next record from the buffer of a hashed join cache
+
+  SYNOPSIS
+    get_record()
+
+  DESCRIPTION
+    Additionally to what the default implementation of the virtual 
+    function get_record does this implementation skips the link element
+    used to connect the records with the same key into a chain. 
+
+  RETURN VALUE
+    TRUE    there are no more records to read from the join buffer
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE_HASHED::get_record()
+{ 
+  pos+= get_size_of_rec_offset();
+  return this->JOIN_CACHE::get_record();
+}
+
+
+/* 
+  Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND
+
+  SYNOPSIS
+    skip_if_matched()
+
+  DESCRIPTION
+    This implementation of the virtual function skip_if_matched does
+    the same as the default implementation does, but it takes into account
+    the link element used to connect the records with the same key into a chain. 
+
+  RETURN VALUE
+    TRUE    the match flag is MATCH_FOUND  and the record has been skipped
+    FALSE   otherwise 
+*/
+
+bool JOIN_CACHE_HASHED::skip_if_matched()
+{
+  uchar *save_pos= pos;
+  pos+= get_size_of_rec_offset();
+  if (!this->JOIN_CACHE::skip_if_matched())
+  {
+    pos= save_pos;
+    return FALSE;
+  }
+  return TRUE;
+}
+
+
+/* 
+  Skip record from a hashed join buffer if its match flag dictates to do so
+
+  SYNOPSIS
+    skip_if_uneeded_match()
+
+  DESCRIPTION
+    This implementation of the virtual function skip_if_not_needed_match does
+    the same as the default implementation does, but it takes into account
+    the link element used to connect the records with the same key into a chain. 
+
+  RETURN VALUE
+    TRUE    the match flag dictates to skip the record
+    FALSE   the match flag is off 
+*/
+
+bool JOIN_CACHE_HASHED::skip_if_not_needed_match()
+{
+  uchar *save_pos= pos;
+  pos+= get_size_of_rec_offset();
+  if (!this->JOIN_CACHE::skip_if_not_needed_match())
+  {
+    pos= save_pos;
+    return FALSE;
+  }
+  return TRUE;
+}
+
+
+/* 
+  Search for a key in the hash table of the join buffer
+
+  SYNOPSIS
+    key_search()
+      key             pointer to the key value
+      key_len         key value length
+      key_ref_ptr OUT position of the reference to the next key from 
+                      the hash element for the found key , or
+                      a position where the reference to the the hash 
+                      element for the key is to be added in the
+                      case when the key has not been found
+      
+  DESCRIPTION
+    The function looks for a key in the hash table of the join buffer.
+    If the key is found the functionreturns the position of the reference
+    to the next key from  to the hash element for the given key. 
+    Otherwise the function returns the position where the reference to the
+    newly created hash element for the given key is to be added.  
+
+  RETURN VALUE
+    TRUE    the key is found in the hash table
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
+                                   uchar **key_ref_ptr) 
+{
+  bool is_found= FALSE;
+  uint idx= (this->*hash_func)(key, key_length);
+  uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
+  while (!is_null_key_ref(ref_ptr))
+  {
+    uchar *next_key;
+    ref_ptr= get_next_key_ref(ref_ptr);
+    next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
+                            ref_ptr-key_length;
+
+    if ((this->*hash_cmp_func)(next_key, key, key_len))
+    {
+      is_found= TRUE;
+      break;
+    }
+  }
+  *key_ref_ptr= ref_ptr;
+  return is_found;
+} 
+
+
+/* 
+  Hash function that considers a key in the hash table as byte array
+
+  SYNOPSIS
+    get_hash_idx_simple()
+      key             pointer to the key value
+      key_len         key value length
+      
+  DESCRIPTION
+    The function calculates an index of the hash entry in the hash table
+    of the join buffer for the given key. It considers the key just as
+    a sequence of bytes of the length key_len.
+
+  RETURN VALUE
+    the calculated index of the hash entry for the given key  
+*/
+
+inline
+uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len)
+{
+  ulong nr= 1;
+  ulong nr2= 4;
+  uchar *pos= key;
+  uchar *end= key+key_len;
+  for (; pos < end ; pos++)
+  {
+    nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
+    nr2+= 3;
+  }
+  return nr % hash_entries;
+}
+
+
+/* 
+  Hash function that takes into account collations of the components of the key  
+
+  SYNOPSIS
+    get_hash_idx_complex()
+      key             pointer to the key value
+      key_len         key value length
+      
+  DESCRIPTION
+    The function calculates an index of the hash entry in the hash table
+    of the join buffer for the given key. It takes into account that the
+    components of the key may be of a varchar type with different collations.
+    The function guarantees that the same hash value for any two equal
+    keys that may differ as byte sequences.
+    The function takes the info about the components of the key, their
+    types and used collations from the class member ref_key_info containing
+    a pointer to the descriptor of the index that can be used for the join
+    operation.
+
+  RETURN VALUE
+    the calculated index of the hash entry for the given key  
+*/
+
+inline
+uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len)
+{
+  return 
+    (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries);
+}
+
+
+/* 
+  Compare two key entries in the hash table as sequence of bytes
+
+  SYNOPSIS
+    equal_keys_simple()
+      key1            pointer to the first key entry
+      key2            pointer to the second key entry 
+      key_len         the length of the key values
+      
+  DESCRIPTION
+    The function compares two key entries in the hash table key1 and key2
+    as two sequences bytes of the length key_len
+
+  RETURN VALUE
+    TRUE       key1 coincides with key2
+    FALSE      otherwise
+*/
+
+inline
+bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2,
+                                          uint key_len)
+{
+  return memcmp(key1, key2, key_len) == 0;
+}
+
+
+/* 
+  Compare two key entries taking into account the used collation
+
+  SYNOPSIS
+    equal_keys_complex()
+      key1            pointer to the first key entry
+      key2            pointer to the second key entry 
+      key_len         the length of the key values
+      
+  DESCRIPTION
+    The function checks whether two key entries in the hash table
+    key1 and key2 are equal as, possibly, compound keys of a certain
+    structure whose components may be of a varchar type and may
+    employ different collations.
+    The descriptor of the key structure is taken from the class
+    member ref_key_info.
+
+  RETURN VALUE
+    TRUE       key1 is equal tokey2
+    FALSE      otherwise
+*/
+
+inline
+bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2,
+                                          uint key_len)
+{
+  return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0;
+}
+
+
+/* 
+  Clean up the hash table of the join buffer
+
+  SYNOPSIS
+    cleanup_hash_table()
+      key             pointer to the key value
+      key_len         key value length
+      
+  DESCRIPTION
+    The function cleans up the hash table in the join buffer removing all
+    hash elements from the table. 
+
+  RETURN VALUE
+    none  
+*/
+
+void JOIN_CACHE_HASHED:: cleanup_hash_table()
+{
+  last_key_entry= hash_table;
+  bzero(hash_table, (buff+buff_size)-hash_table);
+  key_entries= 0;
+}
+
+
+/*
+  Check whether all records in a key chain have their match flags set on   
+
+  SYNOPSIS
+    check_all_match_flags_for_key()
+      key_chain_ptr     
+
+  DESCRIPTION
+    This function retrieves records in the given circular chain and checks
+    whether their match flags are set on. The parameter key_chain_ptr shall
+    point to the position in the join buffer storing the reference to the
+    last element of this chain. 
+            
+  RETURN VALUE
+    TRUE   if each retrieved record has its match flag set to MATCH_FOUND
+    FALSE  otherwise 
+*/
+
+bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr)
+{
+  uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
+  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+  do
+  {
+    next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
+    uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
+    if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND)
+      return FALSE;
+  }
+  while (next_rec_ref_ptr != last_rec_ref_ptr);
+  return TRUE;
+}
+  
+
+/* 
+  Get the next key built for the records from the buffer of a hashed join cache
+
+  SYNOPSIS
+    get_next_key()
+      key    pointer to the buffer where the key value is to be placed
+
+  DESCRIPTION
+    The function reads the next key value stored in the hash table of the
+    join buffer. Depending on the value of the use_emb_key flag of the
+    join cache the value is read either from the table itself or from
+    the record field where it occurs. 
+
+  RETURN VALUE
+    length of the key value - if the starting value of 'cur_key_entry' refers
+    to the position after that referred by the the value of 'last_key_entry',    
+    0 - otherwise.     
+*/
+
+uint JOIN_CACHE_HASHED::get_next_key(uchar ** key)
+{  
+  if (curr_key_entry == last_key_entry)
+    return 0;
+
+  curr_key_entry-= key_entry_length;
+
+  *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
+
+  DBUG_ASSERT(*key >= buff && *key < hash_table);
+
+  return key_length;
+}
+
+
+/* 
+  Initiate an iteration process over records in the joined table
+
+  SYNOPSIS
+    open()
+
+  DESCRIPTION
+    The function initiates the process of iteration over records from the 
+    joined table recurrently performed by the BNL/BKLH join algorithm.  
+
+  RETURN VALUE   
+    0            the initiation is a success 
+    error code   otherwise     
+*/
+
+int JOIN_TAB_SCAN::open()
+{
+  save_or_restore_used_tabs(join_tab, FALSE);
+  is_first_record= TRUE;
+  return join_init_read_record(join_tab);
+}
+
+
+/* 
+  Read the next record that can match while scanning the joined table
+
+  SYNOPSIS
+    next()
+
+  DESCRIPTION
+    The function reads the next record from the joined table that can
+    match some records in the buffer of the join cache 'cache'. To do
+    this the function calls the function that scans table records and
+    looks for the next one that meets the condition pushed to the
+    joined table join_tab.
+
+  NOTES
+    The function catches the signal that kills the query.
+
+  RETURN VALUE   
+    0            the next record exists and has been successfully read 
+    error code   otherwise     
+*/
+
+int JOIN_TAB_SCAN::next()
+{
+  int err= 0;
+  int skip_rc;
+  READ_RECORD *info= &join_tab->read_record;
+  SQL_SELECT *select= join_tab->cache_select;
+  if (is_first_record)
+    is_first_record= FALSE;
+  else
+    err= info->read_record(info);
+  if (!err)
+    update_virtual_fields(join->thd, join_tab->table);
+  while (!err && select && (skip_rc= select->skip_record(join->thd)) <= 0)
+  {
+    if (join->thd->killed || skip_rc < 0) 
+      return 1;
+    /* 
+      Move to the next record if the last retrieved record does not
+      meet the condition pushed to the table join_tab.
+    */
+    err= info->read_record(info);
+    if (!err)
+      update_virtual_fields(join->thd, join_tab->table);
+  } 
+  return err; 
+}
+
+
+/*
+  Walk back in join order from join_tab until we encounter a join tab with
+  tab->cache!=NULL, and save/restore tab->table->status along the way.
+
+  @param save TRUE   save 
+              FALSE  restore
+*/
+
+static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save)
+{
+  JOIN_TAB *first= join_tab->bush_root_tab?
+                     join_tab->bush_root_tab->bush_children->start :
+                     join_tab->join->join_tab + join_tab->join->const_tables;
+
+  for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--)
+  {
+    if (tab->bush_children)
+    {
+      for (JOIN_TAB *child= tab->bush_children->start;
+           child != tab->bush_children->end;
+           child++)
+      {
+        if (save)
+          child->table->status= child->status;
+        else
+        {
+          tab->status= tab->table->status;
+          tab->table->status= 0;
+        }
+      }
+    }
+
+    if (save)
+      tab->table->status= tab->status;
+    else
+    {
+      tab->status= tab->table->status;
+      tab->table->status= 0;
+    }
+  }
+}
+
+
+/* 
+  Perform finalizing actions for a scan over the table records
+
+  SYNOPSIS
+    close()
+
+  DESCRIPTION
+    The function performs the necessary restoring actions after
+    the table scan over the joined table has been finished.
+
+  RETURN VALUE   
+    none      
+*/
+
+void JOIN_TAB_SCAN::close()
+{
+  save_or_restore_used_tabs(join_tab, TRUE);
+}
+
+
+/*
+  Prepare to iterate over the BNL join cache buffer to look for matches 
+
+  SYNOPSIS
+    prepare_look_for_matches()
+      skip_last   <-> ignore the last record in the buffer
+
+  DESCRIPTION
+    The function prepares the join cache for an iteration over the
+    records in the join buffer. The iteration is performed when looking
+    for matches for the record from the joined table join_tab that 
+    has been placed into the record buffer of the joined table.
+    If the value of the parameter skip_last is TRUE then the last
+    record from the join buffer is ignored.
+    The function initializes the counter of the records that have been
+    not iterated over yet.
+    
+  RETURN VALUE   
+    TRUE    there are no records in the buffer to iterate over 
+    FALSE   otherwise
+*/
+    
+bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last)
+{
+  if (!records)
+    return TRUE;
+  reset(FALSE);
+  rem_records= records-test(skip_last);
+  return rem_records == 0;
+}
+
+
+/*
+  Get next record from the BNL join cache buffer when looking for matches 
+
+  SYNOPSIS
+    get_next_candidate_for_match
+
+  DESCRIPTION
+    This method is used for iterations over the records from the join
+    cache buffer when looking for matches for records from join_tab.
+    The methods performs the necessary preparations to read the next record
+    from the join buffer into the record buffer by the method
+    read_next_candidate_for_match, or, to skip the next record from the join 
+    buffer by the method skip_recurrent_candidate_for_match.    
+    This implementation of the virtual method get_next_candidate_for_match
+    just  decrements the counter of the records that are to be iterated over
+    and returns the current value of the cursor 'pos' as the position of 
+    the record to be processed. 
+    
+  RETURN VALUE    
+    pointer to the position right after the prefix of the current record
+    in the join buffer if the there is another record to iterate over,
+    0 - otherwise.  
+*/
+
+uchar *JOIN_CACHE_BNL::get_next_candidate_for_match()
+{
+  if (!rem_records)
+    return 0;
+  rem_records--;
+  return pos+base_prefix_length;
+} 
+
+
+/*
+  Check whether the matching record from the BNL cache is to be skipped 
+
+  SYNOPSIS
+    skip_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after the prefix 
+             of the current record
+
+  DESCRIPTION
+    This implementation of the virtual function just calls the
+    method skip_if_not_needed_match to check whether the record referenced by
+    ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the
+    first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and
+    join_tab is the first inner table of an outer join.
+    If so, the function just skips this record setting the value of the
+    cursor 'pos' to the position right after it.
+
+  RETURN VALUE    
+    TRUE   the record referenced by rec_ptr has been skipped
+    FALSE  otherwise  
+*/
+
+bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr)
+{
+  pos= rec_ptr-base_prefix_length; 
+  return skip_if_not_needed_match();
+}
+
+
+/*
+  Read next record from the BNL join cache buffer when looking for matches 
+
+  SYNOPSIS
+    read_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after the prefix
+             the current record.
+
+  DESCRIPTION
+    This implementation of the virtual method read_next_candidate_for_match
+    calls the method get_record to read the record referenced by rec_ptr from
+    the join buffer into the record buffer. If this record refers to the
+    fields in the other join buffers the call of get_record ensures that
+    these fields are read into the corresponding record buffers as well.
+    This function is supposed to be called after a successful call of
+    the method get_next_candidate_for_match.
+    
+  RETURN VALUE   
+    none
+*/
+
+void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr)
+{
+  pos= rec_ptr-base_prefix_length;
+  get_record();
+} 
+
+
+/*
+  Initialize the BNL join cache 
+
+  SYNOPSIS
+    init
+
+  DESCRIPTION
+    The function initializes the cache structure. It is supposed to be called
+    right after a constructor for the JOIN_CACHE_BNL.
+
+  NOTES
+    The function first constructs a companion object of the type JOIN_TAB_SCAN,
+    then it calls the init method of the parent class.
+    
+  RETURN VALUE  
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BNL::init()
+{
+  DBUG_ENTER("JOIN_CACHE_BNL::init");
+
+  if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
+    DBUG_RETURN(1);
+
+  DBUG_RETURN(JOIN_CACHE::init());
+}
+
+
+/*
+  Get the chain of records from buffer matching the current candidate for join
+
+  SYNOPSIS
+    get_matching_chain_by_join_key()
+
+  DESCRIPTION
+    This function first build a join key for the record of join_tab that
+    currently is in the join buffer for this table. Then it looks for
+    the key entry with this key in the hash table of the join cache.
+    If such a key entry is found the function returns the pointer to
+    the head of the chain of records in the join_buffer that match this
+    key.
+
+  RETURN VALUE
+    The pointer to the corresponding circular list of records if
+    the key entry with the join key is found, 0 - otherwise.
+*/  
+
+uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key()
+{
+  uchar *key_ref_ptr;
+  TABLE *table= join_tab->table;
+  TABLE_REF *ref= &join_tab->ref;
+  KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
+  /* Build the join key value out of the record in the record buffer */
+  key_copy(key_buff, table->record[0], keyinfo, key_length, TRUE);
+  /* Look for this key in the join buffer */
+  if (!key_search(key_buff, key_length, &key_ref_ptr))
+    return 0;
+  return key_ref_ptr+get_size_of_key_offset();
+}
+
+
+/*
+  Prepare to iterate over the BNLH join cache buffer to look for matches 
+
+  SYNOPSIS
+    prepare_look_for_matches()
+      skip_last   <-> ignore the last record in the buffer
+
+  DESCRIPTION
+    The function prepares the join cache for an iteration over the
+    records in the join buffer. The iteration is performed when looking
+    for matches for the record from the joined table join_tab that 
+    has been placed into the record buffer of the joined table.
+    If the value of the parameter skip_last is TRUE then the last
+    record from the join buffer is ignored.
+    The function builds the hashed key from the join fields of join_tab
+    and uses this key to look in the hash table of the join cache for
+    the chain of matching records in in the join buffer. If it finds
+    such a chain it sets  the member last_rec_ref_ptr to point to the
+    last link of the chain while setting the member next_rec_ref_po 0.
+    
+  RETURN VALUE    
+    TRUE    there are no matching records in the buffer to iterate over 
+    FALSE   otherwise
+*/
+    
+bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last)
+{
+  uchar *curr_matching_chain;
+  last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
+  if (!(curr_matching_chain= get_matching_chain_by_join_key()))
+    return 1;
+  last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); 
+  return 0;
+}
+
+
+/*
+  Get next record from the BNLH join cache buffer when looking for matches 
+
+  SYNOPSIS
+    get_next_candidate_for_match
+
+  DESCRIPTION
+    This method is used for iterations over the records from the join
+    cache buffer when looking for matches for records from join_tab.
+    The methods performs the necessary preparations to read the next record
+    from the join buffer into the record buffer by the method
+    read_next_candidate_for_match, or, to skip the next record from the join 
+    buffer by the method skip_next_candidate_for_match.    
+    This implementation of the virtual method moves to the next record
+    in the chain of all records from the join buffer that are to be
+    equi-joined with the current record from join_tab.
+    
+  RETURN VALUE   
+    pointer to the beginning of the record fields in the join buffer
+    if the there is another record to iterate over, 0 - otherwise.  
+*/
+
+uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match()
+{
+  if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr)
+    return 0;
+  next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ?
+                                                next_matching_rec_ref_ptr :
+                                                last_matching_rec_ref_ptr);
+  return next_matching_rec_ref_ptr+rec_fields_offset; 
+} 
+
+
+/*
+  Check whether the matching record from the BNLH cache is to be skipped 
+
+  SYNOPSIS
+    skip_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after 
+             the previous record
+
+  DESCRIPTION
+    This implementation of the virtual function just calls the
+    method get_match_flag_by_pos to check whether the record referenced
+    by ref_ptr has its match flag set to MATCH_FOUND.
+
+  RETURN VALUE    
+    TRUE   the record referenced by rec_ptr has its match flag set to 
+           MATCH_FOUND
+    FALSE  otherwise  
+*/
+
+bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr)
+{
+ return  join_tab->check_only_first_match() &&
+          (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
+}
+
+
+/*
+  Read next record from the BNLH join cache buffer when looking for matches 
+
+  SYNOPSIS
+    read_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after 
+             the previous record
+
+  DESCRIPTION
+    This implementation of the virtual method read_next_candidate_for_match
+    calls the method get_record_by_pos to read the record referenced by rec_ptr
+    from the join buffer into the record buffer. If this record refers to
+    fields in the other join buffers the call of get_record_by_po ensures that
+    these fields are read into the corresponding record buffers as well.
+    This function is supposed to be called after a successful call of
+    the method get_next_candidate_for_match.
+    
+  RETURN VALUE   
+    none
+*/
+
+void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr)
+{
+  get_record_by_pos(rec_ptr);
+} 
+
+
+/*
+  Initialize the BNLH join cache 
+
+  SYNOPSIS
+    init
+
+  DESCRIPTION
+    The function initializes the cache structure. It is supposed to be called
+    right after a constructor for the JOIN_CACHE_BNLH.
+
+  NOTES
+    The function first constructs a companion object of the type JOIN_TAB_SCAN,
+    then it calls the init method of the parent class.
+    
+  RETURN VALUE  
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BNLH::init()
+{
+  DBUG_ENTER("JOIN_CACHE_BNLH::init");
+
+  if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
+    DBUG_RETURN(1);
+
+  DBUG_RETURN(JOIN_CACHE_HASHED::init());
+}
+
+
+/* 
+  Calculate the increment of the MRR buffer for a record write       
+
+  SYNOPSIS
+    aux_buffer_incr()
+
+  DESCRIPTION
+    This implementation of the virtual function aux_buffer_incr determines
+    for how much the size of the MRR buffer should be increased when another
+    record is added to the cache.   
+
+  RETURN VALUE
+    the increment of the size of the MRR buffer for the next record
+*/
+
+uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(ulong recno)
+{
+  uint incr= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  TABLE *tab= join_tab->table;
+  uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1];
+  set_if_bigger(rec_per_key, 1);
+  if (recno == 1)
+    incr=  ref->key_length + tab->file->ref_length;
+  incr+= tab->file->stats.mrr_length_per_rec * rec_per_key;
+  return incr; 
+}
+
+
+/* 
+  Initiate iteration over records returned by MRR for the current join buffer
+
+  SYNOPSIS
+    open()
+
+  DESCRIPTION
+    The function initiates the process of iteration over the records from 
+    join_tab returned by the MRR interface functions for records from
+    the join buffer. Such an iteration is performed by the BKA/BKAH join
+    algorithm for each new refill of the join buffer.
+    The function calls the MRR handler function multi_range_read_init to
+    initiate this process.
+
+  RETURN VALUE   
+    0            the initiation is a success 
+    error code   otherwise     
+*/
+
+int JOIN_TAB_SCAN_MRR::open()
+{
+  handler *file= join_tab->table->file;
+
+  join_tab->table->null_row= 0;
+
+
+  /* Dynamic range access is never used with BKA */
+  DBUG_ASSERT(join_tab->use_quick != 2);
+
+  save_or_restore_used_tabs(join_tab, FALSE);
+
+  init_mrr_buff();
+
+  /* 
+    Prepare to iterate over keys from the join buffer and to get
+    matching candidates obtained with MMR handler functions.
+  */ 
+  if (!file->inited)
+    file->ha_index_init(join_tab->ref.key, 1);
+  ranges= cache->get_number_of_ranges_for_mrr();
+  if (!join_tab->cache_idx_cond)
+    range_seq_funcs.skip_index_tuple= 0;
+  return file->multi_range_read_init(&range_seq_funcs, (void*) cache,
+                                     ranges, mrr_mode, &mrr_buff);
+}
+
+
+/* 
+  Read the next record returned by MRR for the current join buffer
+
+  SYNOPSIS
+    next()
+
+  DESCRIPTION
+    The function reads the next record from the joined table join_tab
+    returned by the MRR handler function multi_range_read_next for
+    the current refill of the join buffer. The record is read into
+    the record buffer used for join_tab records in join operations.
+
+  RETURN VALUE   
+    0            the next record exists and has been successfully read 
+    error code   otherwise     
+*/
+
+int JOIN_TAB_SCAN_MRR::next()
+{
+  char **ptr= (char **) cache->get_curr_association_ptr();
+
+  DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr));
+  int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -1 : 0;
+  if (!rc)
+  {
+    /* 
+      If a record in in an incremental cache contains no fields then the
+      association for the last record in cache will be equal to cache->end_pos
+    */ 
+    DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) &&
+                (uchar *) (*ptr) <= cache->end_pos);
+    update_virtual_fields(join->thd, join_tab->table);
+  }
+  return rc;
+}
+
+
+static 
+void bka_range_seq_key_info(void *init_params, uint *length, 
+                            key_part_map *map)
+{
+  TABLE_REF *ref= &(((JOIN_CACHE*)init_params)->join_tab->ref);
+  *length= ref->key_length;
+  *map= (key_part_map(1) << ref->key_parts) - 1;
+}
+
+
+/*
+  Initialize retrieval of range sequence for BKA join algorithm
+    
+  SYNOPSIS
+    bka_range_seq_init()
+     init_params   pointer to the BKA join cache object
+     n_ranges      the number of ranges obtained 
+     flags         combination of MRR flags
+
+  DESCRIPTION
+    The function interprets init_param as a pointer to a JOIN_CACHE_BKA
+    object. The function prepares for an iteration over the join keys
+    built for all records from the cache join buffer.
+
+  NOTE
+    This function are used only as a callback function.    
+
+  RETURN VALUE
+    init_param value that is to be used as a parameter of bka_range_seq_next()
+*/    
+
+static 
+range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  DBUG_ENTER("bka_range_seq_init");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
+  cache->reset(0);
+  DBUG_RETURN((range_seq_t) init_param);
+}
+
+
+/*
+  Get the next range/key over records from the join buffer used by a BKA cache
+    
+  SYNOPSIS
+    bka_range_seq_next()
+      seq        the value returned by  bka_range_seq_init
+      range  OUT reference to the next range
+  
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKA
+    object. The function returns a pointer to the range descriptor
+    for the key built over the next record from the join buffer.
+
+  NOTE
+    This function are used only as a callback function.
+   
+  RETURN VALUE
+    FALSE   ok, the range structure filled with info about the next range/key
+    TRUE    no more ranges
+*/    
+
+static 
+bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  DBUG_ENTER("bka_range_seq_next");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  TABLE_REF *ref= &cache->join_tab->ref;
+  key_range *start_key= &range->start_key;
+  if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
+  {
+    start_key->keypart_map= (1 << ref->key_parts) - 1;
+    start_key->flag= HA_READ_KEY_EXACT;
+    range->end_key= *start_key;
+    range->end_key.flag= HA_READ_AFTER_KEY;
+    range->ptr= (char *) cache->get_curr_rec();
+    range->range_flag= EQ_RANGE;
+    DBUG_RETURN(0);
+  } 
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Check whether range_info orders to skip the next record from BKA buffer
+
+  SYNOPSIS
+    bka_range_seq_skip_record()
+      seq              value returned by bka_range_seq_init()
+      range_info       information about the next range
+      rowid [NOT USED] rowid of the record to be checked 
+
+    
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
+    The function returns TRUE if the record with this range_info 
+    is to be filtered out from the stream of records returned by 
+    multi_range_read_next(). 
+
+  NOTE
+    This function are used only as a callback function.
+
+  RETURN VALUE
+    1    record with this range_info is to be filtered out from the stream
+         of records returned by multi_range_read_next()
+    0    the record is to be left in the stream
+*/ 
+
+static 
+bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid)
+{
+  DBUG_ENTER("bka_range_seq_skip_record");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  bool res= cache->get_match_flag_by_pos((uchar *) range_info) ==
+            JOIN_CACHE::MATCH_FOUND;
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Check if the record combination from BKA cache matches the index condition
+
+  SYNOPSIS
+    bka_skip_index_tuple()
+      rseq             value returned by bka_range_seq_init()
+      range_info       record chain for the next range/key returned by MRR
+    
+  DESCRIPTION
+    This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
+    see comments there.
+
+  NOTE
+    This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
+ 
+  RETURN VALUE
+    0    The record combination satisfies the index condition
+    1    Otherwise
+*/
+
+static 
+bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
+{
+  DBUG_ENTER("bka_skip_index_tuple");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  bool res= cache->skip_index_tuple(range_info);
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Prepare to read the record from BKA cache matching the current joined record   
+
+  SYNOPSIS
+    prepare_look_for_matches()
+      skip_last <-> ignore the last record in the buffer (always unused here)
+
+  DESCRIPTION
+    The function prepares to iterate over records in the join cache buffer
+    matching the record loaded into the record buffer for join_tab when
+    performing join operation by BKA join algorithm. With BKA algorithms the
+    record loaded into the record buffer for join_tab always has a direct
+    reference to the matching records from the join buffer. When the regular
+    BKA join algorithm is employed the record from join_tab can refer to
+    only one such record.   
+    The function sets the counter of the remaining records from the cache 
+    buffer that would match the current join_tab record to 1.
+    
+  RETURN VALUE   
+    TRUE    there are no records in the buffer to iterate over 
+    FALSE   otherwise
+*/
+    
+bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last)
+{
+  if (!records)
+    return TRUE;
+  rem_records= 1;
+  return FALSE;
+}
+
+
+/*
+  Get the record from the BKA cache matching the current joined record   
+
+  SYNOPSIS
+    get_next_candidate_for_match
+
+  DESCRIPTION
+    This method is used for iterations over the records from the join
+    cache buffer when looking for matches for records from join_tab.
+    The method performs the necessary preparations to read the next record
+    from the join buffer into the record buffer by the method
+    read_next_candidate_for_match, or, to skip the next record from the join 
+    buffer by the method skip_if_not_needed_match.    
+    This implementation of the virtual method get_next_candidate_for_match
+    just  decrements the counter of the records that are to be iterated over
+    and returns the value of curr_association as a reference to the position
+    of the beginning of the record fields in the buffer.
+    
+  RETURN VALUE   
+    pointer to the start of the record fields in the join buffer
+    if the there is another record to iterate over, 0 - otherwise.  
+*/
+
+uchar *JOIN_CACHE_BKA::get_next_candidate_for_match()
+{
+  if (!rem_records)
+    return 0;
+  rem_records--;
+  return curr_association;
+} 
+
+
+/*
+  Check whether the matching record from the BKA cache is to be skipped 
+
+  SYNOPSIS
+    skip_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after 
+             the previous record
+
+  DESCRIPTION
+    This implementation of the virtual function just calls the
+    method get_match_flag_by_pos to check whether the record referenced
+    by ref_ptr has its match flag set to MATCH_FOUND.
+
+  RETURN VALUE   
+    TRUE   the record referenced by rec_ptr has its match flag set to
+           MATCH_FOUND
+    FALSE  otherwise  
+*/
+
+bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr)
+{
+  return join_tab->check_only_first_match() && 
+         (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
+}
+
+
+/*
+  Read the next record from the BKA join cache buffer when looking for matches 
+
+  SYNOPSIS
+    read_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after 
+             the previous record
+
+  DESCRIPTION
+    This implementation of the virtual method read_next_candidate_for_match
+    calls the method get_record_by_pos to read the record referenced by rec_ptr
+    from the join buffer into the record buffer. If this record refers to
+    fields in the other join buffers the call of get_record_by_po ensures that
+    these fields are read into the corresponding record buffers as well.
+    This function is supposed to be called after a successful call of
+    the method get_next_candidate_for_match.
+    
+  RETURN VALUE   
+    none
+*/
+
+void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr)
+{
+  get_record_by_pos(rec_ptr);
+} 
+
+
+/*
+  Initialize the BKA join cache 
+
+  SYNOPSIS
+    init
+
+  DESCRIPTION
+    The function initializes the cache structure. It is supposed to be called
+    right after a constructor for the JOIN_CACHE_BKA.
+
+  NOTES
+    The function first constructs a companion object of the type 
+    JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
+    
+  RETURN VALUE   
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BKA::init()
+{
+  int res;
+  bool check_only_first_match= join_tab->check_only_first_match();
+
+  RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
+                           bka_range_seq_init, 
+                           bka_range_seq_next,
+                           check_only_first_match ?
+                             bka_range_seq_skip_record : 0,
+                           bka_skip_index_tuple };
+
+  DBUG_ENTER("JOIN_CACHE_BKA::init");
+
+  JOIN_TAB_SCAN_MRR *jsm;
+  if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR(join, join_tab, 
+                                                  mrr_mode, rs_funcs)))
+    DBUG_RETURN(1);
+
+  if ((res= JOIN_CACHE::init()))
+    DBUG_RETURN(res);
+
+  if (use_emb_key)
+    jsm->mrr_mode |= HA_MRR_MATERIALIZED_KEYS;
+
+  DBUG_RETURN(0);
+}
+
+
+/* 
+  Get the key built over the next record from BKA join buffer
+
+  SYNOPSIS
+    get_next_key()
+      key    pointer to the buffer where the key value is to be placed
+
+  DESCRIPTION
+    The function reads key fields from the current record in the join buffer.
+    and builds the key value out of these fields that will be used to access
+    the 'join_tab' table. Some of key fields may belong to previous caches.
+    They are accessed via record references to the record parts stored in the
+    previous join buffers. The other key fields always are placed right after
+    the flag fields of the record.
+    If the key is embedded, which means that its value can be read directly
+    from the join buffer, then *key is set to the beginning of the key in
+    this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
+    The function returns the length of the key if it succeeds ro read it.
+    If is assumed that the functions starts reading at the position of
+    the record length which is provided for each records in a BKA cache.
+    After the key is built the 'pos' value points to the first position after
+    the current record.
+    The function just skips the records with MATCH_IMPOSSIBLE in the
+    match flag field if there is any. 
+    The function returns 0 if the initial position is after the beginning
+    of the record fields for last record from the join buffer. 
+
+  RETURN VALUE
+    length of the key value - if the starting value of 'pos' points to
+    the position before the fields for the last record,
+    0 - otherwise.     
+*/
+
+uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
+{
+  uint len;
+  uint32 rec_len;
+  uchar *init_pos;
+  JOIN_CACHE *cache;
+  
+start:
+
+  /* Any record in a BKA cache is prepended with its length */
+  DBUG_ASSERT(with_length);
+   
+  if ((pos+size_of_rec_len) > last_rec_pos || !records)
+    return 0;
+
+  /* Read the length of the record */
+  rec_len= get_rec_length(pos);
+  pos+= size_of_rec_len; 
+  init_pos= pos;
+
+  /* Read a reference to the previous cache if any */
+  if (prev_cache)
+    pos+= prev_cache->get_size_of_rec_offset();
+
+  curr_rec_pos= pos;
+
+  /* Read all flag fields of the record */
+  read_flag_fields();
+
+  if (with_match_flag && 
+      (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE )
+  {
+    pos= init_pos+rec_len;
+    goto start;
+  }
+ 
+  if (use_emb_key)
+  {
+    /* An embedded key is taken directly from the join buffer */
+    *key= pos;
+    len= emb_key_length;
+  }
+  else
+  {
+    /* Read key arguments from previous caches if there are any such fields */
+    if (external_key_arg_fields)
+    {
+      uchar *rec_ptr= curr_rec_pos;
+      uint key_arg_count= external_key_arg_fields;
+      CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
+      for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
+      { 
+        uint len= 0;
+        DBUG_ASSERT(cache);
+        rec_ptr= cache->get_rec_ref(rec_ptr);
+        while (!cache->referenced_fields)
+        {
+          cache= cache->prev_cache;
+          DBUG_ASSERT(cache);
+          rec_ptr= cache->get_rec_ref(rec_ptr);
+        }
+        while (key_arg_count && 
+               cache->read_referenced_field(*copy_ptr, rec_ptr, &len))
+        {
+          copy_ptr++;
+          --key_arg_count;
+        }
+      }
+    }
+    
+    /* 
+      Read the other key arguments from the current record. The fields for
+      these arguments are always first in the sequence of the record's fields.
+    */     
+    CACHE_FIELD *copy= field_descr+flag_fields;
+    CACHE_FIELD *copy_end= copy+local_key_arg_fields;
+    bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
+    for ( ; copy < copy_end; copy++)
+      read_record_field(copy, blob_in_rec_buff);
+    
+    /* Build the key over the fields read into the record buffers */ 
+    TABLE_REF *ref= &join_tab->ref;
+    cp_buffer_from_ref(join->thd, join_tab->table, ref);
+    *key= ref->key_buff;
+    len= ref->key_length;
+  }
+
+  pos= init_pos+rec_len;
+
+  return len;
+} 
+
+
+/*
+  Check the index condition of the joined table for a record from the BKA cache
+
+  SYNOPSIS
+    skip_index_tuple()
+      range_info       pointer to the record returned by MRR 
+    
+  DESCRIPTION
+    This function is invoked from MRR implementation to check if an index
+    tuple matches the index condition. It is used in the case where the index
+    condition actually depends on both columns of the used index and columns
+    from previous tables.
+   
+  NOTES 
+    Accessing columns of the previous tables requires special handling with
+    BKA. The idea of BKA is to collect record combinations in a buffer and 
+    then do a batch of ref access lookups, i.e. by the time we're doing a
+    lookup its previous-records-combination is not in prev_table->record[0]
+    but somewhere in the join buffer.    
+    We need to get it from there back into prev_table(s)->record[0] before we
+    can evaluate the index condition, and that's why we need this function
+    instead of regular IndexConditionPushdown.
+
+  NOTES
+    Possible optimization:
+    Before we unpack the record from a previous table
+    check if this table is used in the condition.
+    If so then unpack the record otherwise skip the unpacking.
+    This should be done by a special virtual method
+    get_partial_record_by_pos().
+
+  RETURN VALUE
+    1    the record combination does not satisfies the index condition
+    0    otherwise
+*/
+
+bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info)
+{
+  DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
+  get_record_by_pos((uchar*)range_info);
+  DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
+}
+
+
+
+/*
+  Initialize retrieval of range sequence for the BKAH join algorithm
+    
+  SYNOPSIS
+    bkah_range_seq_init()
+      init_params   pointer to the BKAH join cache object
+      n_ranges      the number of ranges obtained 
+      flags         combination of MRR flags
+
+  DESCRIPTION
+    The function interprets init_param as a pointer to a JOIN_CACHE_BKAH
+    object. The function prepares for an iteration over distinct join keys
+    built over the records from the cache join buffer.
+
+  NOTE
+    This function are used only as a callback function.    
+
+  RETURN VALUE
+    init_param    value that is to be used as a parameter of 
+                  bkah_range_seq_next()
+*/    
+
+static 
+range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  DBUG_ENTER("bkah_range_seq_init");
+  JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param;
+  cache->reset(0);
+  DBUG_RETURN((range_seq_t) init_param);
+}
+
+
+/*
+  Get the next range/key over records from the join buffer of a BKAH cache  
+    
+  SYNOPSIS
+    bkah_range_seq_next()
+      seq        value returned by  bkah_range_seq_init()
+      range  OUT reference to the next range
+  
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKAH 
+    object. The function returns a pointer to the range descriptor
+    for the next unique key built over records from the join buffer.
+
+  NOTE
+    This function are used only as a callback function.
+   
+  RETURN VALUE
+    FALSE  ok, the range structure filled with info about the next range/key
+    TRUE   no more ranges
+*/    
+
+static 
+bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  DBUG_ENTER("bkah_range_seq_next");
+  JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+  TABLE_REF *ref= &cache->join_tab->ref;
+  key_range *start_key= &range->start_key;
+  if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
+  {
+    start_key->keypart_map= (1 << ref->key_parts) - 1;
+    start_key->flag= HA_READ_KEY_EXACT;
+    range->end_key= *start_key;
+    range->end_key.flag= HA_READ_AFTER_KEY;
+    range->ptr= (char *) cache->get_curr_key_chain();
+    range->range_flag= EQ_RANGE;
+    DBUG_RETURN(0);
+  } 
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Check whether range_info orders to skip the next record from BKAH join buffer
+
+  SYNOPSIS
+    bkah_range_seq_skip_record()
+      seq              value returned by bkah_range_seq_init()
+      range_info       information about the next range/key returned by MRR
+      rowid [NOT USED] rowid of the record to be checked (not used)
+    
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKAH
+    object. The function returns TRUE if the record with this range_info
+    is to be filtered out from the stream of records returned by
+    multi_range_read_next(). 
+
+  NOTE
+    This function are used only as a callback function.
+
+  RETURN VALUE
+    1    record with this range_info is to be filtered out from the stream
+         of records returned by multi_range_read_next()
+    0    the record is to be left in the stream
+*/ 
+
+static 
+bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info,
+                                uchar *rowid)
+{
+  DBUG_ENTER("bkah_range_seq_skip_record");
+  JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+  bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
+  DBUG_RETURN(res);
+}
+
+ 
+/*
+  Check if the record combination from BKAH cache matches the index condition
+
+  SYNOPSIS
+    bkah_skip_index_tuple()
+      rseq             value returned by bka_range_seq_init()
+      range_info       record chain for the next range/key returned by MRR
+    
+  DESCRIPTION
+    This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
+    see comments there.
+
+  NOTE
+    This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
+ 
+  RETURN VALUE
+    0    some records from the chain satisfy the index condition
+    1    otherwise
+*/
+
+static 
+bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
+{
+  DBUG_ENTER("bka_unique_skip_index_tuple");
+  JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+  DBUG_RETURN(cache->skip_index_tuple(range_info));
+}
+
+
+/*
+  Prepare to read record from BKAH cache matching the current joined record   
+
+  SYNOPSIS
+    prepare_look_for_matches()
+      skip_last <-> ignore the last record in the buffer (always unused here)
+
+  DESCRIPTION
+    The function prepares to iterate over records in the join cache buffer
+    matching the record loaded into the record buffer for join_tab when
+    performing join operation by BKAH join algorithm. With BKAH algorithm, if
+    association labels are used, then record loaded into the record buffer 
+    for join_tab always has a direct reference to the chain of the mathing
+    records from the join buffer. If association labels are not used then
+    then the chain of the matching records is obtained by the call of the
+    get_key_chain_by_join_key function.
+    
+  RETURN VALUE   
+    TRUE    there are no records in the buffer to iterate over 
+    FALSE   otherwise
+*/
+    
+bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last)
+{
+  last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
+  if (no_association &&
+      (curr_matching_chain= get_matching_chain_by_join_key()))
+    return 1;
+  last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
+  return 0;
+}
+
+/*
+  Initialize the BKAH join cache 
+
+  SYNOPSIS
+    init
+
+  DESCRIPTION
+    The function initializes the cache structure. It is supposed to be called
+    right after a constructor for the JOIN_CACHE_BKAH.
+
+  NOTES
+    The function first constructs a companion object of the type 
+    JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
+    
+  RETURN VALUE   
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BKAH::init()
+{
+  bool check_only_first_match= join_tab->check_only_first_match();
+
+  no_association= test(mrr_mode & HA_MRR_NO_ASSOCIATION);
+
+  RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
+                           bkah_range_seq_init,
+                           bkah_range_seq_next,
+                           check_only_first_match && !no_association ?
+                             bkah_range_seq_skip_record : 0,
+                           bkah_skip_index_tuple };
+
+  DBUG_ENTER("JOIN_CACHE_BKAH::init");
+
+  if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab, 
+                                             mrr_mode, rs_funcs)))
+    DBUG_RETURN(1);
+
+  DBUG_RETURN(JOIN_CACHE_HASHED::init());
+}
+
+
+/*
+  Check the index condition of the joined table for a record from the BKA cache
+
+  SYNOPSIS
+    skip_index_tuple()
+      range_info       record chain returned by MRR 
+    
+  DESCRIPTION
+    See JOIN_CACHE_BKA::skip_index_tuple().
+    This function is the variant for use with rhe class JOIN_CACHE_BKAH.
+    The difference from JOIN_CACHE_BKA case is that there may be multiple
+    previous table record combinations that share the same key(MRR range).
+    As a consequence, we need to loop through the chain of all table record
+    combinations that match the given MRR range key range_info until we find
+    one that satisfies the index condition.
+
+  NOTE
+    Possible optimization:
+    Before we unpack the record from a previous table
+    check if this table is used in the condition.
+    If so then unpack the record otherwise skip the unpacking.
+    This should be done by a special virtual method
+    get_partial_record_by_pos().
+
+  RETURN VALUE
+    1    any record combination from the chain referred by range_info
+         does not satisfy the index condition
+    0    otherwise
+
+
+*/
+
+bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info)
+{
+  uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info);
+  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+  DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple");
+  do
+  {
+    next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
+    uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset;
+    get_record_by_pos(rec_ptr);
+    if (join_tab->cache_idx_cond->val_int())
+      DBUG_RETURN(FALSE);
+  } while(next_rec_ref_ptr != last_rec_ref_ptr);
+  DBUG_RETURN(TRUE);
+}
diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h
new file mode 100644
index 00000000000..c153689bb99
--- /dev/null
+++ b/sql/sql_join_cache.h
@@ -0,0 +1,1412 @@
+/*
+  This file contains declarations for implementations
+  of block based join algorithms
+*/
+
+#define JOIN_CACHE_INCREMENTAL_BIT           1
+#define JOIN_CACHE_HASHED_BIT                2
+#define JOIN_CACHE_BKA_BIT                   4
+
+/* 
+  Categories of data fields of variable length written into join cache buffers.
+  The value of any of these fields is written into cache together with the
+  prepended length of the value.     
+*/
+#define CACHE_BLOB      1        /* blob field  */
+#define CACHE_STRIPPED  2        /* field stripped of trailing spaces */
+#define CACHE_VARSTR1   3        /* short string value (length takes 1 byte) */ 
+#define CACHE_VARSTR2   4        /* long string value (length takes 2 bytes) */
+#define CACHE_ROWID     5        /* ROWID field */
+
+/*
+  The CACHE_FIELD structure used to describe fields of records that
+  are written into a join cache buffer from record buffers and backward.
+*/
+typedef struct st_cache_field {
+  uchar *str;   /**< buffer from/to where the field is to be copied */ 
+  uint length;  /**< maximal number of bytes to be copied from/to str */
+  /* 
+    Field object for the moved field
+    (0 - for a flag field, see JOIN_CACHE::create_flag_fields).
+  */
+  Field *field;
+  uint type;    /**< category of the of the copied field (CACHE_BLOB et al.) */
+  /* 
+    The number of the record offset value for the field in the sequence
+    of offsets placed after the last field of the record. These
+    offset values are used to access fields referred to from other caches.
+    If the value is 0 then no offset for the field is saved in the
+    trailing sequence of offsets.
+  */ 
+  uint referenced_field_no; 
+  /* The remaining structure fields are used as containers for temp values */
+  uint blob_length; /**< length of the blob to be copied */
+  uint offset;      /**< field offset to be saved in cache buffer */
+} CACHE_FIELD;
+
+
+class JOIN_TAB_SCAN;
+
+
+/*
+  JOIN_CACHE is the base class to support the implementations of 
+  - Block Nested Loop (BNL) Join Algorithm,
+  - Block Nested Loop Hash (BNLH) Join Algorithm,
+  - Batched Key Access (BKA) Join Algorithm.
+  The first algorithm is supported by the derived class JOIN_CACHE_BNL,
+  the second algorithm is supported by the derived class JOIN_CACHE_BNLH,
+  while the third algorithm is implemented in two variant supported by
+  the classes JOIN_CACHE_BKA and JOIN_CACHE_BKAH.
+  These three algorithms have a lot in common. Each of them first accumulates
+  the records of the left join operand in a join buffer and then searches for
+  matching rows of the second operand for all accumulated records.
+  For the first two algorithms this strategy saves on logical I/O operations:
+  the entire set of records from the join buffer requires only one look-through
+  of the records provided by the second operand. 
+  For the third algorithm the accumulation of records allows to optimize
+  fetching rows of the second operand from disk for some engines (MyISAM, 
+  InnoDB), or to minimize the number of round-trips between the Server and
+  the engine nodes (NDB Cluster).        
+*/ 
+
+class JOIN_CACHE :public Sql_alloc
+{
+
+private:
+
+  /* Size of the offset of a record from the cache */   
+  uint size_of_rec_ofs;    
+  /* Size of the length of a record in the cache */
+  uint size_of_rec_len;
+  /* Size of the offset of a field within a record in the cache */   
+  uint size_of_fld_ofs;
+
+protected:
+       
+  /* 3 functions below actually do not use the hidden parameter 'this' */ 
+
+  /* Calculate the number of bytes used to store an offset value */
+  uint offset_size(uint len)
+  { return (len < 256 ? 1 : len < 256*256 ? 2 : 4); }
+
+  /* Get the offset value that takes ofs_sz bytes at the position ptr */
+  ulong get_offset(uint ofs_sz, uchar *ptr)
+  {
+    switch (ofs_sz) {
+    case 1: return uint(*ptr);
+    case 2: return uint2korr(ptr);
+    case 4: return uint4korr(ptr);
+    }
+    return 0;
+  }
+
+  /* Set the offset value ofs that takes ofs_sz bytes at the position ptr */ 
+  void store_offset(uint ofs_sz, uchar *ptr, ulong ofs)
+  {
+    switch (ofs_sz) {
+    case 1: *ptr= (uchar) ofs; return;
+    case 2: int2store(ptr, (uint16) ofs); return;
+    case 4: int4store(ptr, (uint32) ofs); return;
+    }
+  }
+  
+  /* 
+    The maximum total length of the fields stored for a record in the cache.
+    For blob fields only the sizes of the blob lengths are taken into account. 
+  */
+  uint length;
+
+  /* 
+    Representation of the executed multi-way join through which all needed
+    context can be accessed.  
+  */   
+  JOIN *join;  
+
+  /*
+    JOIN_TAB of the first table that can have it's fields in the join cache. 
+    That is, tables in the [start_tab, tab) range can have their fields in the
+    join cache. 
+    If a join tab in the range represents an SJM-nest, then all tables from the
+    nest can have their fields in the join cache, too.
+  */
+  JOIN_TAB *start_tab;
+
+  /* 
+    The total number of flag and data fields that can appear in a record
+    written into the cache. Fields with null values are always skipped 
+    to save space. 
+  */
+  uint fields;
+
+  /* 
+    The total number of flag fields in a record put into the cache. They are
+    used for table null bitmaps, table null row flags, and an optional match
+    flag. Flag fields go before other fields in a cache record with the match
+    flag field placed always at the very beginning of the record.
+  */
+  uint flag_fields;
+
+  /* The total number of blob fields that are written into the cache */ 
+  uint blobs;
+
+  /* 
+    The total number of fields referenced from field descriptors for other join
+    caches. These fields are used to construct key values.
+    When BKA join algorithm is employed the constructed key values serve to
+    access matching rows with index lookups.
+    The key values are put into a hash table when the BNLH join algorithm
+    is employed and when BKAH is used for the join operation. 
+  */   
+  uint referenced_fields;
+   
+  /* 
+    The current number of already created data field descriptors.
+    This number can be useful for implementations of the init methods.  
+  */
+  uint data_field_count; 
+
+  /* 
+    The current number of already created pointers to the data field
+    descriptors. This number can be useful for implementations of
+    the init methods.  
+  */
+  uint data_field_ptr_count;
+ 
+  /* 
+    Array of the descriptors of fields containing 'fields' elements.
+    These are all fields that are stored for a record in the cache. 
+  */
+  CACHE_FIELD *field_descr;
+
+  /* 
+    Array of pointers to the blob descriptors that contains 'blobs' elements.
+  */
+  CACHE_FIELD **blob_ptr;
+
+  /* 
+    This flag indicates that records written into the join buffer contain
+    a match flag field. The flag must be set by the init method. 
+  */
+  bool with_match_flag; 
+  /*
+    This flag indicates that any record is prepended with the length of the
+    record which allows us to skip the record or part of it without reading.
+  */
+  bool with_length;
+
+  /* 
+    The maximal number of bytes used for a record representation in
+    the cache excluding the space for blob data. 
+    For future derived classes this representation may contains some
+    redundant info such as a key value associated with the record.     
+  */
+  uint pack_length;
+  /* 
+    The value of pack_length incremented by the total size of all 
+    pointers of a record in the cache to the blob data. 
+  */
+  uint pack_length_with_blob_ptrs;
+
+  /* 
+    The total size of the record base prefix. The base prefix of record may
+    include the following components:
+     - the length of the record
+     - the link to a record in a previous buffer.
+    Each record in the buffer are supplied with the same set of the components.
+  */
+  uint base_prefix_length;
+
+  /*
+    The expected length of a record in the join buffer together with     
+    all prefixes and postfixes
+  */
+  size_t avg_record_length;
+
+  /* The expected size of the space per record in the auxiliary buffer */
+  size_t avg_aux_buffer_incr;
+
+  /* Expected join buffer space used for one record */
+  size_t space_per_record; 
+
+  /* Pointer to the beginning of the join buffer */
+  uchar *buff;         
+  /* 
+    Size of the entire memory allocated for the join buffer.
+    Part of this memory may be reserved for the auxiliary buffer.
+  */ 
+  size_t buff_size;
+  /* The minimal join buffer size when join buffer still makes sense to use */
+  size_t min_buff_size;
+  /* The maximum expected size if the join buffer to be used */
+  size_t max_buff_size;
+  /* Size of the auxiliary buffer */ 
+  size_t aux_buff_size;
+
+  /* The number of records put into the join buffer */ 
+  size_t records;
+  /* 
+    The number of records in the fully refilled join buffer of
+    the minimal size equal to min_buff_size
+  */
+  size_t min_records;
+  /*
+    The maximum expected number of records to be put in the join buffer
+    at one refill 
+  */
+  size_t max_records;
+
+  /* 
+    Pointer to the current position in the join buffer.
+    This member is used both when writing to buffer and
+    when reading from it.
+  */
+  uchar *pos;
+  /* 
+    Pointer to the first free position in the join buffer,
+    right after the last record into it.
+  */
+  uchar *end_pos; 
+
+  /* 
+    Pointer to the beginning of the first field of the current read/write
+    record from the join buffer. The value is adjusted by the 
+    get_record/put_record functions.
+  */
+  uchar *curr_rec_pos;
+  /* 
+    Pointer to the beginning of the first field of the last record
+    from the join buffer.
+  */
+  uchar *last_rec_pos;
+
+  /* 
+    Flag is set if the blob data for the last record in the join buffer
+    is in record buffers rather than in the join cache.
+  */
+  bool last_rec_blob_data_is_in_rec_buff;
+
+  /* 
+    Pointer to the position to the current record link. 
+    Record links are used only with linked caches. Record links allow to set
+    connections between parts of one join record that are stored in different
+    join buffers.
+    In the simplest case a record link is just a pointer to the beginning of
+    the record stored in the buffer.
+    In a more general case a link could be a reference to an array of pointers
+    to records in the buffer.
+  */
+  uchar *curr_rec_link;
+
+  /* 
+    This flag is set to TRUE if join_tab is the first inner table of an outer
+    join and  the latest record written to the join buffer is detected to be
+    null complemented after checking on conditions over the outer tables for
+    this outer join operation
+  */ 
+  bool last_written_is_null_compl;
+
+  /*
+    The number of fields put in the join buffer of the join cache that are
+    used in building keys to access the table join_tab
+  */
+  uint local_key_arg_fields;
+  /* 
+    The total number of the fields in the previous caches that are used
+    in building keys to access the table join_tab
+  */
+  uint external_key_arg_fields;
+
+  /* 
+    This flag indicates that the key values will be read directly from the join
+    buffer. It will save us building key values in the key buffer.
+  */
+  bool use_emb_key;
+  /* The length of an embedded key value */ 
+  uint emb_key_length;
+
+  /*
+    This object provides the methods to iterate over records of
+    the joined table join_tab when looking for join matches between
+    records from join buffer and records from join_tab.
+    BNL and BNLH join algorithms retrieve all records from join_tab,
+    while BKA/BKAH algorithm iterates only over those records from
+    join_tab that can be accessed by look-ups with join keys built
+    from records in join buffer.  
+  */
+  JOIN_TAB_SCAN *join_tab_scan;
+
+  void calc_record_fields();     
+  void collect_info_on_key_args();
+  int alloc_fields();
+  void create_flag_fields();
+  void create_key_arg_fields();
+  void create_remaining_fields();
+  void set_constants();
+  int alloc_buffer();
+
+  /* Shall reallocate the join buffer */
+  virtual int realloc_buffer();
+  
+  /* Check the possibility to read the access keys directly from join buffer */ 
+  bool check_emb_key_usage();
+
+  uint get_size_of_rec_offset() { return size_of_rec_ofs; }
+  uint get_size_of_rec_length() { return size_of_rec_len; }
+  uint get_size_of_fld_offset() { return size_of_fld_ofs; }
+
+  uchar *get_rec_ref(uchar *ptr)
+  {
+    return buff+get_offset(size_of_rec_ofs, ptr-size_of_rec_ofs);
+  }
+  ulong get_rec_length(uchar *ptr)
+  { 
+    return (ulong) get_offset(size_of_rec_len, ptr);
+  }
+  ulong get_fld_offset(uchar *ptr)
+  { 
+    return (ulong) get_offset(size_of_fld_ofs, ptr);
+  }
+
+  void store_rec_ref(uchar *ptr, uchar* ref)
+  {
+    store_offset(size_of_rec_ofs, ptr-size_of_rec_ofs, (ulong) (ref-buff));
+  }
+  void store_rec_length(uchar *ptr, ulong len)
+  {
+    store_offset(size_of_rec_len, ptr, len);
+  }
+  void store_fld_offset(uchar *ptr, ulong ofs)
+  {
+    store_offset(size_of_fld_ofs, ptr, ofs);
+  }
+
+  /* Write record fields and their required offsets into the join buffer */ 
+  uint write_record_data(uchar *link, bool *is_full);
+
+  /* Get the total length of all prefixes of a record in the join buffer */ 
+  virtual uint get_prefix_length() { return base_prefix_length; }
+  /* Get maximum total length of all affixes of a record in the join buffer */
+  virtual uint get_record_max_affix_length(); 
+
+  /* 
+    Shall get maximum size of the additional space per record used for
+    record keys
+  */
+  virtual uint get_max_key_addon_space_per_record() { return 0; }
+
+  /* 
+    This method must determine for how much the auxiliary buffer should be
+    incremented when a new record is added to the join buffer.
+    If no auxiliary buffer is needed the function should return 0.
+  */
+  virtual uint aux_buffer_incr(ulong recno);
+
+  /* Shall calculate how much space is remaining in the join buffer */ 
+  virtual size_t rem_space() 
+  { 
+    return max(buff_size-(end_pos-buff)-aux_buff_size,0);
+  }
+
+  /* 
+    Shall calculate how much space is taken by allocation of the key
+    for a record in the join buffer
+  */
+  virtual uint extra_key_length() { return 0; }
+
+  /*  Read all flag and data fields of a record from the join buffer */
+  uint read_all_record_fields();
+  
+  /* Read all flag fields of a record from the join buffer */
+  uint read_flag_fields();
+
+  /* Read a data record field from the join buffer */
+  uint read_record_field(CACHE_FIELD *copy, bool last_record);
+
+  /* Read a referenced field from the join buffer */
+  bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len);
+
+  /* 
+    Shall skip record from the join buffer if its match flag
+    is set to MATCH_FOUND
+ */
+  virtual bool skip_if_matched();
+
+  /* 
+    Shall skip record from the join buffer if its match flag
+    commands to do so
+  */
+  virtual bool skip_if_not_needed_match();
+
+  /* 
+    True if rec_ptr points to the record whose blob data stay in
+    record buffers
+  */
+  bool blob_data_is_in_rec_buff(uchar *rec_ptr)
+  {
+    return rec_ptr == last_rec_pos && last_rec_blob_data_is_in_rec_buff;
+  }
+
+  /* Find matches from the next table for records from the join buffer */
+  virtual enum_nested_loop_state join_matching_records(bool skip_last);
+
+  /* Shall set an auxiliary buffer up (currently used only by BKA joins) */
+  virtual int setup_aux_buffer(HANDLER_BUFFER &aux_buff) 
+  {
+    DBUG_ASSERT(0);
+    return 0;
+  }
+
+  /*
+    Shall get the number of ranges in the cache buffer passed
+    to the MRR interface
+  */  
+  virtual uint get_number_of_ranges_for_mrr() { return 0; };
+
+  /* 
+    Shall prepare to look for records from the join cache buffer that would
+    match the record of the joined table read into the record buffer
+  */ 
+  virtual bool prepare_look_for_matches(bool skip_last)= 0;
+  /* 
+    Shall return a pointer to the record from join buffer that is checked
+    as the next candidate for a match with the current record from join_tab.
+    Each implementation of this virtual function should bare in mind
+    that the record position it returns shall be exactly the position
+    passed as the parameter to the implementations of the virtual functions 
+    skip_next_candidate_for_match and read_next_candidate_for_match.
+  */   
+  virtual uchar *get_next_candidate_for_match()= 0;
+  /*
+    Shall check whether the given record from the join buffer has its match
+    flag settings commands to skip the record in the buffer.
+  */
+  virtual bool skip_next_candidate_for_match(uchar *rec_ptr)= 0;
+  /*
+    Shall read the given record from the join buffer into the
+    the corresponding record buffer
+  */
+  virtual void read_next_candidate_for_match(uchar *rec_ptr)= 0;
+
+  /* 
+    Shall return the location of the association label returned by 
+    the multi_read_range_next function for the current record loaded
+    into join_tab's record buffer
+  */
+  virtual uchar **get_curr_association_ptr() { return 0; };
+
+  /* Add null complements for unmatched outer records from the join buffer */
+  virtual enum_nested_loop_state join_null_complements(bool skip_last);
+
+  /* Restore the fields of the last record from the join buffer */
+  virtual void restore_last_record();
+
+  /* Set match flag for a record in join buffer if it has not been set yet */
+  bool set_match_flag_if_none(JOIN_TAB *first_inner, uchar *rec_ptr);
+
+  enum_nested_loop_state generate_full_extensions(uchar *rec_ptr);
+
+  /* Check matching to a partial join record from the join buffer */
+  bool check_match(uchar *rec_ptr);
+
+  /* 
+    This constructor creates an unlinked join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE(JOIN *j, JOIN_TAB *tab)
+  {
+    join= j;
+    join_tab= tab;
+    prev_cache= next_cache= 0;
+    buff= 0;
+  }
+
+  /* 
+    This constructor creates a linked join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)   
+  {  
+    join= j;
+    join_tab= tab;
+    next_cache= 0;
+    prev_cache= prev;
+    buff= 0;
+    if (prev)
+      prev->next_cache= this;
+  }
+
+public:
+ 
+  /*
+    The enumeration type Join_algorithm includes a mnemonic constant for
+    each join algorithm that employs join buffers
+  */
+
+  enum Join_algorithm
+  { 
+    BNL_JOIN_ALG,     /* Block Nested Loop Join algorithm                  */
+    BNLH_JOIN_ALG,    /* Block Nested Loop Hash Join algorithm             */
+    BKA_JOIN_ALG,     /* Batched Key Access Join algorithm                 */
+    BKAH_JOIN_ALG,    /* Batched Key Access with Hash Table Join Algorithm */
+  };
+
+  /* 
+    The enumeration type Match_flag describes possible states of the match flag
+    field  stored for the records of the first inner tables of outer joins and
+    semi-joins in the cases when the first match strategy is used for them.
+    When a record with match flag field is written into the join buffer the
+    state of the field usually is MATCH_NOT_FOUND unless this is a record of the
+    first inner table of the outer join for which the on precondition (the
+    condition from on expression over outer tables)  has turned out not to be 
+    true. In the last case the state of the match flag is MATCH_IMPOSSIBLE.
+    The state of the match flag field is changed to MATCH_FOUND as soon as
+    the first full matching combination of inner tables of the outer join or
+    the semi-join is discovered. 
+  */
+  enum Match_flag { MATCH_NOT_FOUND, MATCH_FOUND, MATCH_IMPOSSIBLE };
+
+  /* Table to be joined with the partial join records from the cache */ 
+  JOIN_TAB *join_tab;
+
+  /* Pointer to the previous join cache if there is any */
+  JOIN_CACHE *prev_cache;
+  /* Pointer to the next join cache if there is any */
+  JOIN_CACHE *next_cache;
+
+  /* Shall initialize the join cache structure */ 
+  virtual int init();
+
+  /* Get the current size of the cache join buffer */ 
+  size_t get_join_buffer_size() { return buff_size; }
+  /* Set the size of the cache join buffer to a new value */
+  void set_join_buffer_size(size_t sz) { buff_size= sz; }
+
+  /* Get the minimum possible size of the cache join buffer */
+  virtual ulong get_min_join_buffer_size();
+  /* Get the maximum possible size of the cache join buffer */ 
+  virtual ulong get_max_join_buffer_size(bool optimize_buff_size);
+
+  /* Shrink the size if the cache join buffer in a given ratio */
+  bool shrink_join_buffer_in_ratio(ulonglong n, ulonglong d);
+
+  /*  Shall return the type of the employed join algorithm */
+  virtual enum Join_algorithm get_join_alg()= 0;
+
+  /* 
+    The function shall return TRUE only when there is a key access
+    to the join table
+  */
+  virtual bool is_key_access()= 0;
+
+  /* Shall reset the join buffer for reading/writing */
+  virtual void reset(bool for_writing);
+
+  /* 
+    This function shall add a record into the join buffer and return TRUE
+    if it has been decided that it should be the last record in the buffer.
+  */ 
+  virtual bool put_record();
+
+  /* 
+    This function shall read the next record into the join buffer and return
+    TRUE if there is no more next records.
+  */ 
+  virtual bool get_record();
+
+  /* 
+    This function shall read the record at the position rec_ptr
+    in the join buffer
+  */ 
+  virtual void get_record_by_pos(uchar *rec_ptr);
+
+  /* Shall return the value of the match flag for the positioned record */
+  virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr);
+
+  /* Shall return the position of the current record */
+  virtual uchar *get_curr_rec() { return curr_rec_pos; }
+
+  /* Shall set the current record link */
+  virtual void set_curr_rec_link(uchar *link) { curr_rec_link= link; }
+
+  /* Shall return the current record link */
+  virtual uchar *get_curr_rec_link()
+  { 
+    return (curr_rec_link ? curr_rec_link : get_curr_rec());
+  }
+     
+  /* Join records from the join buffer with records from the next join table */ 
+  enum_nested_loop_state join_records(bool skip_last);
+
+  /* Add a comment on the join algorithm employed by the join cache */
+  virtual void print_explain_comment(String *str);
+
+  virtual ~JOIN_CACHE() {}
+  void reset_join(JOIN *j) { join= j; }
+  void free()
+  { 
+    my_free(buff);
+    buff= 0;
+  }   
+  
+  friend class JOIN_CACHE_HASHED;
+  friend class JOIN_CACHE_BNL;
+  friend class JOIN_CACHE_BKA;
+  friend class JOIN_TAB_SCAN;
+  friend class JOIN_TAB_SCAN_MRR;
+
+};
+
+
+/*
+  The class JOIN_CACHE_HASHED is the base class for the classes
+  JOIN_CACHE_HASHED_BNL and JOIN_CACHE_HASHED_BKA. The first of them supports
+  an implementation of Block Nested Loop Hash (BNLH) Join Algorithm,
+  while the second is used for a variant of the BKA Join algorithm that performs
+  only one lookup for any records from join buffer with the same key value. 
+  For a join cache of this class the records from the join buffer that have
+  the same access key are linked into a chain attached to a key entry structure
+  that either itself contains the key value, or, in the case when the keys are
+  embedded, refers to its occurrence in one of the records from the chain.
+  To build the chains with the same keys a hash table is employed. It is placed
+  at the very end of the join buffer. The array of hash entries is allocated
+  first at the very bottom of the join buffer, while key entries are placed
+  before this array.
+  A hash entry contains a header of the list of the key entries with the same
+  hash value. 
+  Each key entry is a structure of the following type:
+    struct st_join_cache_key_entry {
+      union { 
+        uchar[] value;
+        cache_ref *value_ref; // offset from the beginning of the buffer
+      } hash_table_key;
+      key_ref next_key; // offset backward from the beginning of hash table
+      cache_ref *last_rec // offset from the beginning of the buffer
+    }
+  The references linking the records in a chain are always placed at the very
+  beginning of the record info stored in the join buffer. The records are 
+  linked in a circular list. A new record is always added to the end of this 
+  list.
+
+  The following picture represents a typical layout for the info stored in the
+  join buffer of a join cache object of the JOIN_CACHE_HASHED class.
+    
+  buff
+  V
+  +----------------------------------------------------------------------------+
+  |     |[*]record_1_1|                                                        |
+  |     ^ |                                                                    |
+  |     | +--------------------------------------------------+                 |
+  |     |                           |[*]record_2_1|          |                 |
+  |     |                           ^ |                      V                 |
+  |     |                           | +------------------+   |[*]record_1_2|   |
+  |     |                           +--------------------+-+   |               |
+  |+--+ +---------------------+                          | |   +-------------+ |
+  ||  |                       |                          V |                 | |
+  |||[*]record_3_1|         |[*]record_1_3|              |[*]record_2_2|     | |
+  ||^                       ^                            ^                   | |
+  ||+----------+            |                            |                   | |
+  ||^          |            |<---------------------------+-------------------+ |
+  |++          | | ... mrr  |   buffer ...           ... |     |               |
+  |            |            |                            |                     |
+  |      +-----+--------+   |                      +-----|-------+             |
+  |      V     |        |   |                      V     |       |             |
+  ||key_3|[/]|[*]|      |   |                |key_2|[/]|[*]|     |             |
+  |                   +-+---|-----------------------+            |             |
+  |                   V |   |                       |            |             |
+  |             |key_1|[*]|[*]|         |   | ... |[*]|   ...  |[*]|  ...  |   |
+  +----------------------------------------------------------------------------+
+                                        ^           ^            ^
+                                        |           i-th entry   j-th entry
+                                        hash table
+
+  i-th hash entry:
+    circular record chain for key_1:
+      record_1_1
+      record_1_2
+      record_1_3 (points to record_1_1)
+    circular record chain for key_3:
+      record_3_1 (points to itself)
+
+  j-th hash entry:
+    circular record chain for key_2:
+      record_2_1
+      record_2_2 (points to record_2_1)
+
+*/
+
+class JOIN_CACHE_HASHED: public JOIN_CACHE
+{
+
+  typedef uint (JOIN_CACHE_HASHED::*Hash_func) (uchar *key, uint key_len);
+  typedef bool (JOIN_CACHE_HASHED::*Hash_cmp_func) (uchar *key1, uchar *key2,
+                                                    uint key_len);
+  
+private:
+
+  /* Size of the offset of a key entry in the hash table */
+  uint size_of_key_ofs;
+
+  /* 
+    Length of the key entry in the hash table.
+    A key entry either contains the key value, or it contains a reference
+    to the key value if use_emb_key flag is set for the cache.
+  */ 
+  uint key_entry_length;
+ 
+  /* The beginning of the hash table in the join buffer */
+  uchar *hash_table;
+  /* Number of hash entries in the hash table */
+  uint hash_entries;
+
+
+  /* The position of the currently retrieved key entry in the hash table */
+  uchar *curr_key_entry;
+
+  /* The offset of the data fields from the beginning of the record fields */
+  uint data_fields_offset;
+
+  inline uint get_hash_idx_simple(uchar *key, uint key_len);
+  inline uint get_hash_idx_complex(uchar *key, uint key_len);
+
+  inline bool equal_keys_simple(uchar *key1, uchar *key2, uint key_len);
+  inline bool equal_keys_complex(uchar *key1, uchar *key2, uint key_len);
+
+  int init_hash_table();
+  void cleanup_hash_table();
+  
+protected:
+
+  /* 
+    Index info on the TABLE_REF object used by the hash join
+    to look for matching records
+  */    
+  KEY *ref_key_info;
+  /* 
+    Number of the key parts the TABLE_REF object used by the hash join
+    to look for matching records
+  */    
+  uint ref_used_key_parts;
+
+  /*
+    The hash function used in the hash table,
+    usually set by the init() method
+  */ 
+  Hash_func hash_func;
+  /*
+    The function to check whether two key entries in the hash table
+    are equal or not, usually set by the init() method
+  */ 
+  Hash_cmp_func hash_cmp_func;
+
+  /* 
+    Length of a key value.
+    It is assumed that all key values have the same length.
+  */
+  uint key_length;
+  /* Buffer to store key values for probing */
+  uchar *key_buff;
+
+  /* Number of key entries in the hash table (number of distinct keys) */
+  uint key_entries;
+
+  /* The position of the last key entry in the hash table */
+  uchar *last_key_entry;
+
+  /* 
+    The offset of the record fields from the beginning of the record
+    representation. The record representation starts with a reference to
+    the next record in the key record chain followed by the length of
+    the trailing record data followed by a reference to the record segment
+    in the previous cache, if any, followed by the record fields.
+  */ 
+  uint rec_fields_offset;
+
+  uint get_size_of_key_offset() { return size_of_key_ofs; }
+
+  /* 
+    Get the position of the next_key_ptr field pointed to by 
+    a linking reference stored at the position key_ref_ptr. 
+    This reference is actually the offset backward from the
+    beginning of hash table.
+  */  
+  uchar *get_next_key_ref(uchar *key_ref_ptr)
+  {
+    return hash_table-get_offset(size_of_key_ofs, key_ref_ptr);
+  }
+
+  /* 
+    Store the linking reference to the next_key_ptr field at 
+    the position key_ref_ptr. The position of the next_key_ptr
+    field is pointed to by ref. The stored reference is actually
+    the offset backward from the beginning of the hash table.
+  */  
+  void store_next_key_ref(uchar *key_ref_ptr, uchar *ref)
+  {
+    store_offset(size_of_key_ofs, key_ref_ptr, (ulong) (hash_table-ref));
+  }     
+  
+  /* 
+    Check whether the reference to the next_key_ptr field at the position
+    key_ref_ptr contains  a nil value.
+  */
+  bool is_null_key_ref(uchar *key_ref_ptr)
+  {
+    ulong nil= 0;
+    return memcmp(key_ref_ptr, &nil, size_of_key_ofs ) == 0;
+  } 
+
+  /* 
+    Set the reference to the next_key_ptr field at the position
+    key_ref_ptr equal to nil.
+  */
+  void store_null_key_ref(uchar *key_ref_ptr)
+  {
+    ulong nil= 0;
+    store_offset(size_of_key_ofs, key_ref_ptr, nil);
+  } 
+
+  uchar *get_next_rec_ref(uchar *ref_ptr)
+  {
+    return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+  }
+
+  void store_next_rec_ref(uchar *ref_ptr, uchar *ref)
+  {
+    store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+  } 
+
+  /*
+    Get the position of the embedded key value for the current
+    record pointed to by get_curr_rec().
+  */ 
+  uchar *get_curr_emb_key()
+  {
+    return get_curr_rec()+data_fields_offset;
+  }
+
+  /*
+    Get the position of the embedded key value pointed to by a reference
+    stored at ref_ptr. The stored reference is actually the offset from
+    the beginning of the join buffer.
+  */  
+  uchar *get_emb_key(uchar *ref_ptr)
+  {
+    return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+  }
+
+  /* 
+    Store the reference to an embedded key at the position key_ref_ptr.
+    The position of the embedded key is pointed to by ref. The stored
+    reference is actually the offset from the beginning of the join buffer.
+  */  
+  void store_emb_key_ref(uchar *ref_ptr, uchar *ref)
+  {
+    store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+  }
+  
+  /* Get the total length of all prefixes of a record in hashed join buffer */ 
+  uint get_prefix_length() 
+  { 
+    return base_prefix_length + get_size_of_rec_offset();
+  }
+
+  /* 
+    Get maximum size of the additional space per record used for
+    the hash table with record keys
+  */
+  uint get_max_key_addon_space_per_record();
+
+  /* 
+    Calculate how much space in the buffer would not be occupied by
+    records, key entries and additional memory for the MMR buffer.
+  */ 
+  size_t rem_space() 
+  { 
+    return max(last_key_entry-end_pos-aux_buff_size,0);
+  }
+
+  /* 
+    Calculate how much space is taken by allocation of the key
+    entry for a record in the join buffer
+  */
+  uint extra_key_length() { return key_entry_length; }
+
+  /* 
+    Skip record from a hashed join buffer if its match flag
+    is set to MATCH_FOUND
+  */
+  bool skip_if_matched();
+
+  /*
+    Skip record from a hashed join buffer if its match flag setting 
+    commands to do so
+  */
+  bool skip_if_not_needed_match();
+
+  /* Search for a key in the hash table of the join buffer */
+  bool key_search(uchar *key, uint key_len, uchar **key_ref_ptr);
+
+  /* Reallocate the join buffer of a hashed join cache */
+  int realloc_buffer();
+
+  /* 
+    This constructor creates an unlinked hashed join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE_HASHED(JOIN *j, JOIN_TAB *tab) :JOIN_CACHE(j, tab) {}
+
+  /* 
+    This constructor creates a linked hashed join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE_HASHED(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) 
+		    :JOIN_CACHE(j, tab, prev) {}
+
+public:
+
+  /* Initialize a hashed join cache */       
+  int init();
+
+  /* Reset the buffer of a hashed join cache for reading/writing */
+  void reset(bool for_writing);
+
+  /* Add a record into the buffer of a hashed join cache */
+  bool put_record();
+
+  /* Read the next record from the buffer of a hashed join cache */
+  bool get_record();
+
+  /*
+    Shall check whether all records in a key chain have 
+    their match flags set on
+  */   
+  virtual bool check_all_match_flags_for_key(uchar *key_chain_ptr);
+
+  uint get_next_key(uchar **key); 
+  
+  /* Get the head of the record chain attached to the current key entry */ 
+  uchar *get_curr_key_chain()
+  {
+    return get_next_rec_ref(curr_key_entry+key_entry_length-
+                            get_size_of_rec_offset());
+  }
+  
+};
+
+
+/*
+  The class JOIN_TAB_SCAN is a companion class for the classes JOIN_CACHE_BNL
+  and JOIN_CACHE_BNLH. Actually the class implements the iterator over the
+  table joinded by BNL/BNLH join algorithm.
+  The virtual functions open, next and close are called for any iteration over
+  the table. The function open is called to initiate the process of the 
+  iteration. The function next shall read the next record from the joined
+  table. The record is read into the record buffer of the joined table.
+  The record is to be matched with records from the join cache buffer. 
+  The function close shall perform the finalizing actions for the iteration.
+*/
+   
+class JOIN_TAB_SCAN: public Sql_alloc
+{
+
+private:
+  /* TRUE if this is the first record from the joined table to iterate over */
+  bool is_first_record;
+
+protected:
+
+  /* The joined table to be iterated over */
+  JOIN_TAB *join_tab;
+  /* The join cache used to join the table join_tab */ 
+  JOIN_CACHE *cache;
+  /* 
+    Representation of the executed multi-way join through which
+    all needed context can be accessed.  
+  */   
+  JOIN *join;
+
+public:
+  
+  JOIN_TAB_SCAN(JOIN *j, JOIN_TAB *tab)
+  {
+    join= j;
+    join_tab= tab;
+    cache= join_tab->cache;
+  }
+
+  virtual ~JOIN_TAB_SCAN() {}
+ 
+  /* 
+    Shall calculate the increment of the auxiliary buffer for a record
+    write if such a buffer is used by the table scan object 
+  */
+  virtual uint aux_buffer_incr(ulong recno) { return 0; }
+
+  /* Initiate the process of iteration over the joined table */
+  virtual int open();
+  /* 
+    Shall read the next candidate for matches with records from 
+    the join buffer.
+  */
+  virtual int next();
+  /* 
+    Perform the finalizing actions for the process of iteration
+    over the joined_table.
+  */ 
+  virtual void close();
+
+};
+
+/*
+  The class JOIN_CACHE_BNL is used when the BNL join algorithm is
+  employed to perform a join operation   
+*/
+
+class JOIN_CACHE_BNL :public JOIN_CACHE
+{
+private:
+  /* 
+    The number of the records in the join buffer that have to be
+    checked yet for a match with the current record of join_tab 
+    read into the record buffer.
+  */
+  uint rem_records;
+
+protected:
+
+  bool prepare_look_for_matches(bool skip_last);
+
+  uchar *get_next_candidate_for_match();
+
+  bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+  void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BNL join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab) :JOIN_CACHE(j, tab) {}
+
+  /* 
+    This constructor creates a linked BNL join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) 
+    :JOIN_CACHE(j, tab, prev) {}
+
+  /* Initialize the BNL cache */       
+  int init();
+
+  enum Join_algorithm get_join_alg() { return BNL_JOIN_ALG; }
+
+  bool is_key_access() { return FALSE; }
+
+};
+
+
+/*
+  The class JOIN_CACHE_BNLH is used when the BNLH join algorithm is
+  employed to perform a join operation   
+*/
+
+class JOIN_CACHE_BNLH :public JOIN_CACHE_HASHED
+{
+
+protected:
+
+  /* 
+    The pointer to the last record from the circular list of the records
+    that  match the join key built out of the record in the join buffer for
+    the join_tab table
+  */
+  uchar *last_matching_rec_ref_ptr;
+  /*
+    The pointer to the next current  record from the circular list of the
+    records that match the join key built out of the record in the join buffer
+    for the join_tab table. This pointer is used by the class method 
+    get_next_candidate_for_match to iterate over records from the circular
+    list.
+  */
+  uchar *next_matching_rec_ref_ptr;
+
+  /*
+    Get the chain of records from buffer matching the current candidate
+    record for join
+  */
+  uchar *get_matching_chain_by_join_key();
+
+  bool prepare_look_for_matches(bool skip_last);
+
+  uchar *get_next_candidate_for_match();
+
+  bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+  void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BNLH join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE_BNLH(JOIN *j, JOIN_TAB *tab) : JOIN_CACHE_HASHED(j, tab) {}
+
+  /* 
+    This constructor creates a linked BNLH join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE_BNLH(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) 
+    : JOIN_CACHE_HASHED(j, tab, prev) {}
+
+  /* Initialize the BNLH cache */       
+  int init();
+
+  enum Join_algorithm get_join_alg() { return BNLH_JOIN_ALG; }
+
+  bool is_key_access() { return TRUE; }
+
+};
+
+
+/*
+  The class JOIN_TAB_SCAN_MRR is a companion class for the classes
+  JOIN_CACHE_BKA and JOIN_CACHE_BKAH. Actually the class implements the
+  iterator over the records from join_tab selected by BKA/BKAH join
+  algorithm as the candidates to be joined. 
+  The virtual functions open, next and close are called for any iteration over
+  join_tab record candidates. The function open is called to initiate the
+  process of the iteration. The function next shall read the next record from
+  the set of the record candidates. The record is read into the record buffer
+  of the joined table. The function close shall perform the finalizing actions
+  for the iteration.
+*/
+   
+class JOIN_TAB_SCAN_MRR: public JOIN_TAB_SCAN
+{
+  /* Interface object to generate key ranges for MRR */
+  RANGE_SEQ_IF range_seq_funcs;
+
+  /* Number of ranges to be processed by the MRR interface */
+  uint ranges;
+
+  /* Flag to to be passed to the MRR interface */ 
+  uint mrr_mode;
+
+  /* MRR buffer assotiated with this join cache */
+  HANDLER_BUFFER mrr_buff;
+
+  /* Shall initialize the MRR buffer */
+  virtual void init_mrr_buff()
+  {
+    cache->setup_aux_buffer(mrr_buff);
+  }
+
+public:
+
+  JOIN_TAB_SCAN_MRR(JOIN *j, JOIN_TAB *tab, uint flags, RANGE_SEQ_IF rs_funcs)
+    :JOIN_TAB_SCAN(j, tab), range_seq_funcs(rs_funcs), mrr_mode(flags) {}
+
+  uint aux_buffer_incr(ulong recno);
+
+  int open();
+ 
+  int next();
+
+  friend class JOIN_CACHE_BKA; /* it needs to add an mrr_mode flag after JOIN_CACHE::init() call */
+};
+
+/*
+  The class JOIN_CACHE_BKA is used when the BKA join algorithm is
+  employed to perform a join operation   
+*/
+
+class JOIN_CACHE_BKA :public JOIN_CACHE
+{
+private:
+
+  /* Flag to to be passed to the companion JOIN_TAB_SCAN_MRR object */
+  uint mrr_mode;
+
+  /* 
+    This value is set to 1 by the class prepare_look_for_matches method
+    and back to 0 by the class get_next_candidate_for_match method
+  */
+  uint rem_records;
+
+  /*
+    This field contains the current association label set by a call of
+    the multi_range_read_next handler function.
+    See the function JOIN_CACHE_BKA::get_curr_key_association()
+  */
+  uchar *curr_association;
+
+protected:
+
+  /* 
+    Get the number of ranges in the cache buffer passed to the MRR
+    interface. For each record its own range is passed.
+  */
+  uint get_number_of_ranges_for_mrr() { return (uint)records; }
+
+ /*
+   Setup the MRR buffer as the space between the last record put
+   into the join buffer and the very end of the join buffer 
+ */
+  int setup_aux_buffer(HANDLER_BUFFER &aux_buff)
+  {
+    aux_buff.buffer= end_pos;
+    aux_buff.buffer_end= buff+buff_size;
+    return 0;
+  }
+
+  bool prepare_look_for_matches(bool skip_last);
+
+  uchar *get_next_candidate_for_match();
+
+  bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+  void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BKA join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags)
+    :JOIN_CACHE(j, tab), mrr_mode(flags) {}
+  /* 
+    This constructor creates a linked BKA join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE *prev)
+    :JOIN_CACHE(j, tab, prev), mrr_mode(flags) {}
+  
+  uchar **get_curr_association_ptr() { return &curr_association; }
+
+  /* Initialize the BKA cache */       
+  int init();
+
+  enum Join_algorithm get_join_alg() { return BKA_JOIN_ALG; }
+
+  bool is_key_access() { return TRUE; }
+
+  /* Get the key built over the next record from the join buffer */
+  uint get_next_key(uchar **key);
+
+  /* Check index condition of the joined table for a record from BKA cache */
+  bool skip_index_tuple(range_id_t range_info);
+
+  void print_explain_comment(String *str);
+};
+
+
+
+/*
+  The class JOIN_CACHE_BKAH is used when the BKAH join algorithm is
+  employed to perform a join operation   
+*/
+
+class JOIN_CACHE_BKAH :public JOIN_CACHE_BNLH
+{
+
+private:
+  /* Flag to to be passed to the companion JOIN_TAB_SCAN_MRR object */
+  uint mrr_mode;
+
+  /* 
+    This flag is set to TRUE if the implementation of the MRR interface cannot
+    handle range association labels and does not return them to the caller of
+    the multi_range_read_next handler function. E.g. the implementation of
+    the MRR inteface for the Falcon engine could not return association
+    labels to the caller of multi_range_read_next.
+    The flag is set by JOIN_CACHE_BKA::init() and is not ever changed.
+  */       
+  bool no_association;
+
+  /* 
+    This field contains the association label returned by the 
+    multi_range_read_next function.
+    See the function JOIN_CACHE_BKAH::get_curr_key_association()
+  */
+  uchar *curr_matching_chain;
+
+protected:
+
+  uint get_number_of_ranges_for_mrr() { return key_entries; }
+
+  /* 
+    Initialize the MRR buffer allocating some space within the join buffer.
+    The entire space between the last record put into the join buffer and the
+    last key entry added to the hash table is used for the MRR buffer.
+  */
+  int setup_aux_buffer(HANDLER_BUFFER &aux_buff)
+  {
+    aux_buff.buffer= end_pos;
+    aux_buff.buffer_end= last_key_entry;
+    return 0;
+  }
+
+  bool prepare_look_for_matches(bool skip_last);
+
+  /*
+    The implementations of the methods
+    - get_next_candidate_for_match
+    - skip_recurrent_candidate_for_match
+    - read_next_candidate_for_match
+    are inherited from the JOIN_CACHE_BNLH class
+  */
+
+public:
+
+  /* 
+    This constructor creates an unlinked BKAH join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKAH(JOIN *j, JOIN_TAB *tab, uint flags) 
+    :JOIN_CACHE_BNLH(j, tab), mrr_mode(flags) {}
+
+  /* 
+    This constructor creates a linked BKAH join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKAH(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE *prev)
+    :JOIN_CACHE_BNLH(j, tab, prev), mrr_mode(flags)  {}
+
+  uchar **get_curr_association_ptr() { return &curr_matching_chain; }
+
+  /* Initialize the BKAH cache */       
+  int init();
+
+  enum Join_algorithm get_join_alg() { return BKAH_JOIN_ALG; }
+
+  /* Check index condition of the joined table for a record from BKAH cache */
+  bool skip_index_tuple(range_id_t range_info);
+
+  void print_explain_comment(String *str);
+};
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index df27362633f..f3dfd95815e 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -27,6 +27,7 @@
 #include <hash.h>
 #include "sp.h"
 #include "sp_head.h"
+#include "sql_select.h"
 
 static int lex_one_token(void *arg, void *yythd);
 
@@ -138,6 +139,87 @@ void lex_free(void)
   DBUG_VOID_RETURN;
 }
 
+/**
+  Initialize lex object for use in fix_fields and parsing.
+
+  SYNOPSIS
+    init_lex_with_single_table()
+    @param thd                 The thread object
+    @param table               The table object
+  @return Operation status
+    @retval TRUE                An error occurred, memory allocation error
+    @retval FALSE               Ok
+
+  DESCRIPTION
+    This function is used to initialize a lex object on the
+    stack for use by fix_fields and for parsing. In order to
+    work properly it also needs to initialize the
+    Name_resolution_context object of the lexer.
+    Finally it needs to set a couple of variables to ensure
+    proper functioning of fix_fields.
+*/
+
+int
+init_lex_with_single_table(THD *thd, TABLE *table, LEX *lex)
+{
+  TABLE_LIST *table_list;
+  Table_ident *table_ident;
+  SELECT_LEX *select_lex= &lex->select_lex;
+  Name_resolution_context *context= &select_lex->context;
+  /*
+    We will call the parser to create a part_info struct based on the
+    partition string stored in the frm file.
+    We will use a local lex object for this purpose. However we also
+    need to set the Name_resolution_object for this lex object. We
+    do this by using add_table_to_list where we add the table that
+    we're working with to the Name_resolution_context.
+  */
+  thd->lex= lex;
+  lex_start(thd);
+  context->init();
+  if ((!(table_ident= new Table_ident(thd,
+                                      table->s->table_name,
+                                      table->s->db, TRUE))) ||
+      (!(table_list= select_lex->add_table_to_list(thd,
+                                                   table_ident,
+                                                   NULL,
+                                                   0))))
+    return TRUE;
+  context->resolve_in_table_list_only(table_list);
+  lex->use_only_table_context= TRUE;
+  lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_VCOL_EXPR;
+  select_lex->cur_pos_in_select_list= UNDEF_POS;
+  table->map= 1; //To ensure correct calculation of const item
+  table->get_fields_in_item_tree= TRUE;
+  table_list->table= table;
+  return FALSE;
+}
+
+/**
+  End use of local lex with single table
+
+  SYNOPSIS
+    end_lex_with_single_table()
+    @param thd               The thread object
+    @param table             The table object
+    @param old_lex           The real lex object connected to THD
+
+  DESCRIPTION
+    This function restores the real lex object after calling
+    init_lex_with_single_table and also restores some table
+    variables temporarily set.
+*/
+
+void
+end_lex_with_single_table(THD *thd, TABLE *table, LEX *old_lex)
+{
+  LEX *lex= thd->lex;
+  table->map= 0;
+  table->get_fields_in_item_tree= FALSE;
+  lex_end(lex);
+  thd->lex= old_lex;
+}
+
 
 void
 st_parsing_options::reset()
@@ -392,7 +474,6 @@ void lex_start(THD *thd)
   lex->context_analysis_only= 0;
   lex->derived_tables= 0;
   lex->safe_to_cache_query= 1;
-  lex->leaf_tables_insert= 0;
   lex->parsing_options.reset();
   lex->empty_field_list_on_rset= 0;
   lex->select_lex.select_number= 1;
@@ -415,6 +496,7 @@ void lex_start(THD *thd)
   lex->reset_query_tables_list(FALSE);
   lex->expr_allows_subselect= TRUE;
   lex->use_only_table_context= FALSE;
+  lex->parse_vcol_expr= FALSE;
 
   lex->name.str= 0;
   lex->name.length= 0;
@@ -461,6 +543,8 @@ void lex_end(LEX *lex)
   delete lex->sphead;
   lex->sphead= NULL;
 
+  lex->mi.reset();
+
   DBUG_VOID_RETURN;
 }
 
@@ -916,7 +1000,7 @@ int MYSQLlex(void *arg, void *yythd)
 
 int lex_one_token(void *arg, void *yythd)
 {
-  reg1	uchar c= 0;
+  reg1	uchar c;
   bool comment_closed;
   int	tokval, result_state;
   uint length;
@@ -925,10 +1009,11 @@ int lex_one_token(void *arg, void *yythd)
   Lex_input_stream *lip= & thd->m_parser_state->m_lip;
   LEX *lex= thd->lex;
   YYSTYPE *yylval=(YYSTYPE*) arg;
-  CHARSET_INFO *cs= thd->charset();
-  uchar *state_map= cs->state_map;
-  uchar *ident_map= cs->ident_map;
+  CHARSET_INFO *const cs= thd->charset();
+  const uchar *const state_map= cs->state_map;
+  const uchar *const ident_map= cs->ident_map;
 
+  LINT_INIT(c);
   lip->yylval=yylval;			// The global state
 
   lip->start_token();
@@ -960,44 +1045,48 @@ int lex_one_token(void *arg, void *yythd)
 	yylval->lex_str.length=2;
 	return NULL_SYM;
       }
+      /* Fall through */
     case MY_LEX_CHAR:			// Unknown or single char token
     case MY_LEX_SKIP:			// This should not happen
-      if (c == '-' && lip->yyPeek() == '-' &&
+      if (c != ')')
+	lip->next_state= MY_LEX_START;	// Allow signed numbers
+      return((int) c);
+
+    case MY_LEX_MINUS_OR_COMMENT:
+      if (lip->yyPeek() == '-' &&
           (my_isspace(cs,lip->yyPeekn(1)) ||
            my_iscntrl(cs,lip->yyPeekn(1))))
       {
         state=MY_LEX_COMMENT;
         break;
       }
+      lip->next_state= MY_LEX_START;	// Allow signed numbers
+      return((int) c);
 
-      if (c != ')')
-	lip->next_state= MY_LEX_START;	// Allow signed numbers
-
-      if (c == ',')
-      {
-        /*
-          Warning:
-          This is a work around, to make the "remember_name" rule in
-          sql/sql_yacc.yy work properly.
-          The problem is that, when parsing "select expr1, expr2",
-          the code generated by bison executes the *pre* action
-          remember_name (see select_item) *before* actually parsing the
-          first token of expr2.
-        */
-        lip->restart_token();
-      }
-      else
-      {
-        /*
-          Check for a placeholder: it should not precede a possible identifier
-          because of binlogging: when a placeholder is replaced with
-          its value in a query for the binlog, the query must stay
-          grammatically correct.
-        */
-        if (c == '?' && lip->stmt_prepare_mode && !ident_map[lip->yyPeek()])
+    case MY_LEX_PLACEHOLDER:
+      /*
+        Check for a placeholder: it should not precede a possible identifier
+        because of binlogging: when a placeholder is replaced with
+        its value in a query for the binlog, the query must stay
+        grammatically correct.
+      */
+      lip->next_state= MY_LEX_START;	// Allow signed numbers
+      if (lip->stmt_prepare_mode && !ident_map[(uchar) lip->yyPeek()])
         return(PARAM_MARKER);
-      }
+      return((int) c);
 
+    case MY_LEX_COMMA:
+      lip->next_state= MY_LEX_START;	// Allow signed numbers
+      /*
+        Warning:
+        This is a work around, to make the "remember_name" rule in
+        sql/sql_yacc.yy work properly.
+        The problem is that, when parsing "select expr1, expr2",
+        the code generated by bison executes the *pre* action
+        remember_name (see select_item) *before* actually parsing the
+        first token of expr2.
+      */
+      lip->restart_token();
       return((int) c);
 
     case MY_LEX_IDENT_OR_NCHAR:
@@ -1062,7 +1151,10 @@ int lex_one_token(void *arg, void *yythd)
       else
 #endif
       {
-        for (result_state= c; ident_map[c= lip->yyGet()]; result_state|= c) ;
+        for (result_state= c;
+             ident_map[(uchar) (c= lip->yyGet())];
+             result_state|= c)
+          ;
         /* If there were non-ASCII characters, mark that we must convert */
         result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
       }
@@ -1074,9 +1166,11 @@ int lex_one_token(void *arg, void *yythd)
           If we find a space then this can't be an identifier. We notice this
           below by checking start != lex->ptr.
         */
-        for (; state_map[c] == MY_LEX_SKIP ; c= lip->yyGet()) ;
+        for (; state_map[(uchar) c] == MY_LEX_SKIP ; c= lip->yyGet())
+          ;
       }
-      if (start == lip->get_ptr() && c == '.' && ident_map[lip->yyPeek()])
+      if (start == lip->get_ptr() && c == '.' &&
+          ident_map[(uchar) lip->yyPeek()])
 	lip->next_state=MY_LEX_IDENT_SEP;
       else
       {					// '(' must follow directly if function
@@ -1119,12 +1213,12 @@ int lex_one_token(void *arg, void *yythd)
 
       return(result_state);			// IDENT or IDENT_QUOTED
 
-    case MY_LEX_IDENT_SEP:		// Found ident and now '.'
+    case MY_LEX_IDENT_SEP:                  // Found ident and now '.'
       yylval->lex_str.str= (char*) lip->get_ptr();
       yylval->lex_str.length= 1;
-      c= lip->yyGet();                  // should be '.'
-      lip->next_state= MY_LEX_IDENT_START;// Next is an ident (not a keyword)
-      if (!ident_map[lip->yyPeek()])            // Probably ` or "
+      c= lip->yyGet();                          // should be '.'
+      lip->next_state= MY_LEX_IDENT_START;      // Next is ident (not keyword)
+      if (!ident_map[(uchar) lip->yyPeek()])    // Probably ` or "
 	lip->next_state= MY_LEX_START;
       return((int) c);
 
@@ -1147,7 +1241,8 @@ int lex_one_token(void *arg, void *yythd)
         }
         else if (c == 'b')
         {
-          while ((c= lip->yyGet()) == '0' || c == '1') ;
+          while ((c= lip->yyGet()) == '0' || c == '1')
+            ;
           if ((lip->yyLength() >= 3) && !ident_map[c])
           {
             /* Skip '0b' */
@@ -1206,11 +1301,12 @@ int lex_one_token(void *arg, void *yythd)
       else
 #endif
       {
-        for (result_state=0; ident_map[c= lip->yyGet()]; result_state|= c) ;
+        for (result_state=0; ident_map[c= lip->yyGet()]; result_state|= c)
+          ;
         /* If there were non-ASCII characters, mark that we must convert */
         result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
       }
-      if (c == '.' && ident_map[lip->yyPeek()])
+      if (c == '.' && ident_map[(uchar) lip->yyPeek()])
 	lip->next_state=MY_LEX_IDENT_SEP;// Next is '.'
 
       yylval->lex_str= get_token(lip, 0, lip->yyLength());
@@ -1309,7 +1405,8 @@ int lex_one_token(void *arg, void *yythd)
 
     case MY_LEX_BIN_NUMBER:           // Found b'bin-string'
       lip->yySkip();                  // Accept opening '
-      while ((c= lip->yyGet()) == '0' || c == '1') ;
+      while ((c= lip->yyGet()) == '0' || c == '1')
+        ;
       if (c != '\'')
         return(ABORT_SYM);            // Illegal hex constant
       lip->yySkip();                  // Accept closing '
@@ -1320,8 +1417,8 @@ int lex_one_token(void *arg, void *yythd)
       return (BIN_NUM);
 
     case MY_LEX_CMP_OP:			// Incomplete comparison operator
-      if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP ||
-          state_map[lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
+      if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP ||
+          state_map[(uchar) lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
         lip->yySkip();
       if ((tokval = find_keyword(lip, lip->yyLength() + 1, 0)))
       {
@@ -1332,11 +1429,11 @@ int lex_one_token(void *arg, void *yythd)
       break;
 
     case MY_LEX_LONG_CMP_OP:		// Incomplete comparison operator
-      if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP ||
-          state_map[lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
+      if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP ||
+          state_map[(uchar) lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
       {
         lip->yySkip();
-        if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP)
+        if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP)
           lip->yySkip();
       }
       if ((tokval = find_keyword(lip, lip->yyLength() + 1, 0)))
@@ -1555,7 +1652,7 @@ int lex_one_token(void *arg, void *yythd)
       }
       break;
     case MY_LEX_USER_END:		// end '@' of user@hostname
-      switch (state_map[lip->yyPeek()]) {
+      switch (state_map[(uchar) lip->yyPeek()]) {
       case MY_LEX_STRING:
       case MY_LEX_USER_VARIABLE_DELIMITER:
       case MY_LEX_STRING_OR_DELIMITER:
@@ -1580,7 +1677,7 @@ int lex_one_token(void *arg, void *yythd)
       yylval->lex_str.str=(char*) lip->get_ptr();
       yylval->lex_str.length=1;
       lip->yySkip();                                    // Skip '@'
-      lip->next_state= (state_map[lip->yyPeek()] ==
+      lip->next_state= (state_map[(uchar) lip->yyPeek()] ==
 			MY_LEX_USER_VARIABLE_DELIMITER ?
 			MY_LEX_OPERATOR_OR_IDENT :
 			MY_LEX_IDENT_OR_KEYWORD);
@@ -1592,7 +1689,8 @@ int lex_one_token(void *arg, void *yythd)
 	[(global | local | session) .]variable_name
       */
 
-      for (result_state= 0; ident_map[c= lip->yyGet()]; result_state|= c) ;
+      for (result_state= 0; ident_map[c= lip->yyGet()]; result_state|= c)
+        ;
       /* If there were non-ASCII characters, mark that we must convert */
       result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
 
@@ -1722,6 +1820,8 @@ void st_select_lex_unit::init_query()
   item_list.empty();
   describe= 0;
   found_rows_for_union= 0;
+  insert_table_with_stored_vcol= 0;
+  derived= 0;
 }
 
 void st_select_lex::init_query()
@@ -1730,13 +1830,14 @@ void st_select_lex::init_query()
   table_list.empty();
   top_join_list.empty();
   join_list= &top_join_list;
-  embedding= leaf_tables= 0;
+  embedding= 0;
+  leaf_tables_prep.empty();
+  leaf_tables.empty();
   item_list.empty();
   join= 0;
   having= prep_having= where= prep_where= 0;
   olap= UNSPECIFIED_OLAP_TYPE;
   having_fix_field= 0;
-  group_fix_field= 0;
   context.select_lex= this;
   context.init();
   /*
@@ -1765,11 +1866,15 @@ void st_select_lex::init_query()
   link_next= 0;
   m_non_agg_field_used= false;
   m_agg_func_used= false;
+  is_prep_leaf_list_saved= FALSE;
+  bzero((char*) expr_cache_may_be_used, sizeof(expr_cache_may_be_used));
 }
 
 void st_select_lex::init_select()
 {
   st_select_lex_node::init_select();
+  sj_nests.empty();
+  sj_subselects.empty();
   group_list.empty();
   type= db= 0;
   having= 0;
@@ -1797,6 +1902,8 @@ void st_select_lex::init_select()
   inner_refs_list.empty();
   m_non_agg_field_used= false;
   m_agg_func_used= false;
+  insert_tables= 0;
+  merged_into= 0;
 }
 
 /*
@@ -1814,6 +1921,31 @@ void st_select_lex_node::include_down(st_select_lex_node *upper)
   slave= 0;
 }
 
+
+void st_select_lex_node::add_slave(st_select_lex_node *slave_arg)
+{
+  for (; slave; slave= slave->next)
+    if (slave == slave_arg)
+      return;
+
+  if (slave)
+  {
+    st_select_lex_node *slave_arg_slave= slave_arg->slave;
+    /* Insert in the front of list of slaves if any. */
+    slave_arg->include_neighbour(slave);
+    /* include_neighbour() sets slave_arg->slave=0, restore it. */
+    slave_arg->slave= slave_arg_slave;
+    /* Count on include_neighbour() setting the master. */
+    DBUG_ASSERT(slave_arg->master == this);
+  }
+  else
+  {
+    slave= slave_arg;
+    slave_arg->master= this;
+  }
+}
+
+
 /*
   include on level down (but do not link)
 
@@ -1865,17 +1997,29 @@ void st_select_lex_node::fast_exclude()
   
 }
 
+
 /*
-  excluding select_lex structure (except first (first select can't be
+  Exclude a node from the tree lex structure, but leave it in the global
+  list of nodes.
+*/
+
+void st_select_lex_node::exclude_from_tree()
+{
+  if ((*prev= next))
+    next->prev= prev;
+}
+
+
+/*
+  Exclude select_lex structure (except first (first select can't be
   deleted, because it is most upper select))
 */
 void st_select_lex_node::exclude()
 {
-  //exclude from global list
+  /* exclude from global list */
   fast_exclude();
-  //exclude from other structures
-  if ((*prev= next))
-    next->prev= prev;
+  /* exclude from other structures */
+  exclude_from_tree();
   /* 
      We do not need following statements, because prev pointer of first 
      list element point to master->slave
@@ -1974,32 +2118,43 @@ void st_select_lex_unit::exclude_tree()
     'last' should be reachable from this st_select_lex_node
 */
 
-void st_select_lex::mark_as_dependent(st_select_lex *last)
+bool st_select_lex::mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency)
 {
+
+  DBUG_ASSERT(this != last);
+
   /*
     Mark all selects from resolved to 1 before select where was
     found table as depended (of select where was found table)
   */
-  for (SELECT_LEX *s= this;
-       s && s != last;
-       s= s->outer_select())
-    if (!(s->uncacheable & UNCACHEABLE_DEPENDENT))
+  SELECT_LEX *s= this;
+  do
+  {
+    if (!(s->uncacheable & UNCACHEABLE_DEPENDENT_GENERATED))
     {
       // Select is dependent of outer select
       s->uncacheable= (s->uncacheable & ~UNCACHEABLE_UNITED) |
-                       UNCACHEABLE_DEPENDENT;
+                       UNCACHEABLE_DEPENDENT_GENERATED;
       SELECT_LEX_UNIT *munit= s->master_unit();
       munit->uncacheable= (munit->uncacheable & ~UNCACHEABLE_UNITED) |
-                       UNCACHEABLE_DEPENDENT;
+                       UNCACHEABLE_DEPENDENT_GENERATED;
       for (SELECT_LEX *sl= munit->first_select(); sl ; sl= sl->next_select())
       {
         if (sl != s &&
-            !(sl->uncacheable & (UNCACHEABLE_DEPENDENT | UNCACHEABLE_UNITED)))
+            !(sl->uncacheable & (UNCACHEABLE_DEPENDENT_GENERATED |
+                                 UNCACHEABLE_UNITED)))
           sl->uncacheable|= UNCACHEABLE_UNITED;
       }
     }
+
+    Item_subselect *subquery_expr= s->master_unit()->item;
+    if (subquery_expr && subquery_expr->mark_as_dependent(thd, last, 
+                                                          dependency))
+      return TRUE;
+  } while ((s= s->outer_select()) != last && s != 0);
   is_correlated= TRUE;
   this->master_unit()->item->is_correlated= TRUE;
+  return FALSE;
 }
 
 bool st_select_lex_node::set_braces(bool value)      { return 1; }
@@ -2125,23 +2280,25 @@ ulong st_select_lex::get_table_join_options()
 
 bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num)
 {
+  DBUG_ENTER("st_select_lex::setup_ref_array");
+
   if (ref_pointer_array)
-    return 0;
+    DBUG_RETURN(0);
 
   // find_order_in_list() may need some extra space, so multiply by two.
   order_group_num*= 2;
 
   /*
-    We have to create array in prepared statement memory if it is
+    We have to create array in prepared statement memory if it is a
     prepared statement
   */
-  Query_arena *arena= thd->stmt_arena;
-  return (ref_pointer_array=
-          (Item **)arena->alloc(sizeof(Item*) * (n_child_sum_items +
-                                                 item_list.elements +
-                                                 select_n_having_items +
-                                                 select_n_where_fields +
-                                                 order_group_num)*5)) == 0;
+  ref_pointer_array=
+    (Item **)thd->stmt_arena->alloc(sizeof(Item*) * (n_child_sum_items +
+                                                     item_list.elements +
+                                                     select_n_having_items +
+                                                     select_n_where_fields +
+                                                     order_group_num)*5);
+  DBUG_RETURN(ref_pointer_array == 0);
 }
 
 
@@ -2186,9 +2343,27 @@ void st_select_lex::print_order(String *str,
   {
     if (order->counter_used)
     {
-      char buffer[20];
-      size_t length= my_snprintf(buffer, 20, "%d", order->counter);
-      str->append(buffer, (uint) length);
+      if (query_type != QT_VIEW_INTERNAL)
+      {
+        char buffer[20];
+        size_t length= my_snprintf(buffer, 20, "%d", order->counter);
+        str->append(buffer, (uint) length);
+      }
+      else
+      {
+        /* replace numeric reference with expression */
+        if (order->item[0]->type() == Item::INT_ITEM &&
+            order->item[0]->basic_const_item())
+        {
+          char buffer[20];
+          size_t length= my_snprintf(buffer, 20, "%d", order->counter);
+          str->append(buffer, (uint) length);
+          /* make it expression instead of integer constant */
+          str->append(STRING_WITH_LEN("+0"));
+        }
+        else
+          (*order->item)->print(str, query_type);
+      }
     }
     else
       (*order->item)->print(str, query_type);
@@ -2206,16 +2381,17 @@ void st_select_lex::print_limit(THD *thd,
 {
   SELECT_LEX_UNIT *unit= master_unit();
   Item_subselect *item= unit->item;
-  if (item && unit->global_parameters == this &&
-      (item->substype() == Item_subselect::EXISTS_SUBS ||
-       item->substype() == Item_subselect::IN_SUBS ||
-       item->substype() == Item_subselect::ALL_SUBS))
+
+  if (item && unit->global_parameters == this)
   {
-    DBUG_ASSERT(!item->fixed ||
-                (select_limit->val_int() == LL(1) && offset_limit == 0));
-    return;
+    Item_subselect::subs_type subs_type= item->substype();
+    if (subs_type == Item_subselect::EXISTS_SUBS ||
+        subs_type == Item_subselect::IN_SUBS ||
+        subs_type == Item_subselect::ALL_SUBS)
+    {
+      return;
+    }
   }
-
   if (explicit_limit)
   {
     str->append(STRING_WITH_LEN(" limit "));
@@ -2228,6 +2404,7 @@ void st_select_lex::print_limit(THD *thd,
   }
 }
 
+
 /**
   @brief Restore the LEX and THD in case of a parse error.
 
@@ -2352,6 +2529,7 @@ LEX::LEX()
                          INITIAL_LEX_PLUGIN_LIST_SIZE, 
                          INITIAL_LEX_PLUGIN_LIST_SIZE);
   reset_query_tables_list(TRUE);
+  mi.init();
 }
 
 
@@ -2655,6 +2833,7 @@ void st_select_lex_unit::set_limit(st_select_lex *sl)
 void LEX::set_trg_event_type_for_tables()
 {
   uint8 new_trg_event_map= 0;
+  DBUG_ENTER("LEX::set_trg_event_type_for_tables");
 
   /*
     Some auxiliary operations
@@ -2774,6 +2953,7 @@ void LEX::set_trg_event_type_for_tables()
       tables->trg_event_map= new_trg_event_map;
     tables= tables->next_local;
   }
+  DBUG_VOID_RETURN;
 }
 
 
@@ -3021,10 +3201,14 @@ static void fix_prepare_info_in_table_list(THD *thd, TABLE_LIST *tbl)
   {
     if (tbl->on_expr)
     {
-      tbl->prep_on_expr= tbl->on_expr;
+      thd->check_and_register_item_tree(&tbl->prep_on_expr, &tbl->on_expr);
       tbl->on_expr= tbl->on_expr->copy_andor_structure(thd);
     }
-    fix_prepare_info_in_table_list(thd, tbl->merge_underlying_list);
+    if (tbl->is_view_or_derived() && tbl->is_merged_derived())
+    {
+      SELECT_LEX *sel= tbl->get_single_select();
+      fix_prepare_info_in_table_list(thd, sel->get_table_list());
+    }
   }
 }
 
@@ -3055,12 +3239,12 @@ void st_select_lex::fix_prepare_information(THD *thd, Item **conds,
     first_execution= 0;
     if (*conds)
     {
-      prep_where= *conds;
+      thd->check_and_register_item_tree(&prep_where, conds);
       *conds= where= prep_where->copy_andor_structure(thd);
     }
     if (*having_conds)
     {
-      prep_having= *having_conds;
+      thd->check_and_register_item_tree(&prep_having, having_conds);
       *having_conds= having= prep_having->copy_andor_structure(thd);
     }
     fix_prepare_info_in_table_list(thd, table_list.first);
@@ -3139,6 +3323,545 @@ bool st_select_lex::add_index_hint (THD *thd, char *str, uint length)
                                             str, length));
 }
 
+
+bool st_select_lex::optimize_unflattened_subqueries()
+{
+  for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit())
+  {
+    Item_subselect *subquery_predicate= un->item;
+    
+    if (subquery_predicate)
+    {
+      if (subquery_predicate->substype() == Item_subselect::IN_SUBS)
+      {
+        Item_in_subselect *in_subs=(Item_in_subselect*)subquery_predicate;
+        if (in_subs->is_jtbm_merged)
+          continue;
+      }
+
+      for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
+      {
+        JOIN *inner_join= sl->join;
+        if (!inner_join)
+          continue;
+        SELECT_LEX *save_select= un->thd->lex->current_select;
+        ulonglong save_options;
+        int res;
+        /* We need only 1 row to determine existence */
+        un->set_limit(un->global_parameters);
+        un->thd->lex->current_select= sl;
+        save_options= inner_join->select_options;
+        if (options & SELECT_DESCRIBE)
+        {
+          /* Optimize the subquery in the context of EXPLAIN. */
+          sl->set_explain_type();
+          sl->options|= SELECT_DESCRIBE;
+          inner_join->select_options|= SELECT_DESCRIBE;
+        }
+        res= inner_join->optimize();
+        inner_join->select_options= save_options;
+        un->thd->lex->current_select= save_select;
+        if (res)
+          return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+
+/**
+  @brief Process all derived tables/views of the SELECT.
+
+  @param lex    LEX of this thread
+  @param phase  phases to run derived tables/views through
+
+  @details
+  This function runs specified 'phases' on all tables from the
+  table_list of this select.
+
+  @return FALSE ok.
+  @return TRUE an error occur.
+*/
+
+bool st_select_lex::handle_derived(LEX *lex, uint phases)
+{
+  for (TABLE_LIST *cursor= (TABLE_LIST*) table_list.first;
+       cursor;
+       cursor= cursor->next_local)
+  {
+    if (cursor->is_view_or_derived() && cursor->handle_derived(lex, phases))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Returns first unoccupied table map and table number
+
+  @param map     [out] return found map
+  @param tablenr [out] return found tablenr
+
+  @details
+  Returns first unoccupied table map and table number in this select.
+  Map and table are returned in *'map' and *'tablenr' accordingly.
+
+  @retrun TRUE  no free table map/table number
+  @return FALSE found free table map/table number
+*/
+
+bool st_select_lex::get_free_table_map(table_map *map, uint *tablenr)
+{
+  *map= 0;
+  *tablenr= 0;
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    if (tl->table->map > *map)
+      *map= tl->table->map;
+    if (tl->table->tablenr > *tablenr)
+      *tablenr= tl->table->tablenr;
+  }
+  (*map)<<= 1;
+  (*tablenr)++;
+  if (*tablenr >= MAX_TABLES)
+    return TRUE;
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Append given table to the leaf_tables list.
+
+  @param link  Offset to which list in table structure to use
+  @param table Table to append
+
+  @details
+  Append given 'table' to the leaf_tables list using the 'link' offset.
+  If the 'table' is linked with other tables through next_leaf/next_local
+  chains then whole list will be appended.
+*/
+
+void st_select_lex::append_table_to_list(TABLE_LIST *TABLE_LIST::*link,
+                                         TABLE_LIST *table)
+{
+  TABLE_LIST *tl;
+  for (tl= leaf_tables.head(); tl->*link; tl= tl->*link) ;
+  tl->*link= table;
+}
+
+/*
+  @brief
+  Remove given table from the leaf_tables list.
+
+  @param link  Offset to which list in table structure to use
+  @param table Table to remove
+
+  @details
+  Remove 'table' from the leaf_tables list using the 'link' offset.
+*/
+
+void st_select_lex::remove_table_from_list(TABLE_LIST *table)
+{
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    if (tl == table)
+    {
+      ti.remove();
+      break;
+    }
+  }
+}
+
+
+/**
+  @brief
+  Assigns new table maps to tables in the leaf_tables list
+
+  @param derived    Derived table to take initial table map from
+  @param map        table map to begin with
+  @param tablenr    table number to begin with
+  @param parent_lex new parent select_lex
+
+  @details
+  Assign new table maps/table numbers to all tables in the leaf_tables list.
+  'map'/'tablenr' are used for the first table and shifted to left/
+  increased for each consequent table in the leaf_tables list.
+  If the 'derived' table is given then it's table map/number is used for the
+  first table in the list and 'map'/'tablenr' are used for the second and
+  all consequent tables.
+  The 'parent_lex' is set as the new parent select_lex for all tables in the
+  list.
+*/
+
+void st_select_lex::remap_tables(TABLE_LIST *derived, table_map map,
+                                 uint tablenr, SELECT_LEX *parent_lex)
+{
+  bool first_table= TRUE;
+  TABLE_LIST *tl;
+  table_map first_map;
+  uint first_tablenr;
+
+  if (derived && derived->table)
+  {
+    first_map= derived->table->map;
+    first_tablenr= derived->table->tablenr;
+  }
+  else
+  {
+    first_map= map;
+    map<<= 1;
+    first_tablenr= tablenr++;
+  }
+  /*
+    Assign table bit/table number.
+    To the first table of the subselect the table bit/tablenr of the
+    derived table is assigned. The rest of tables are getting bits
+    sequentially, starting from the provided table map/tablenr.
+  */
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    if (first_table)
+    {
+      first_table= FALSE;
+      tl->table->set_table_map(first_map, first_tablenr);
+    }
+    else
+    {
+      tl->table->set_table_map(map, tablenr);
+      tablenr++;
+      map<<= 1;
+    }
+    SELECT_LEX *old_sl= tl->select_lex;
+    tl->select_lex= parent_lex;
+    for(TABLE_LIST *emb= tl->embedding;
+        emb && emb->select_lex == old_sl;
+        emb= emb->embedding)
+      emb->select_lex= parent_lex;
+  }
+}
+
+/**
+  @brief
+  Merge a subquery into this select.
+
+  @param derived     derived table of the subquery to be merged
+  @param subq_select select_lex of the subquery
+  @param map         table map for assigning to merged tables from subquery
+  @param table_no    table number for assigning to merged tables from subquery
+
+  @details
+  This function merges a subquery into its parent select. In short the
+  merge operation appends the subquery FROM table list to the parent's
+  FROM table list. In more details:
+    .) the top_join_list of the subquery is wrapped into a join_nest
+       and attached to 'derived'
+    .) subquery's leaf_tables list  is merged with the leaf_tables
+       list of this select_lex
+    .) the table maps and table numbers of the tables merged from
+       the subquery are adjusted to reflect their new binding to
+       this select
+
+  @return TRUE  an error occur
+  @return FALSE ok
+*/
+
+bool SELECT_LEX::merge_subquery(THD *thd, TABLE_LIST *derived,
+                                SELECT_LEX *subq_select,
+                                uint table_no, table_map map)
+{
+  derived->wrap_into_nested_join(subq_select->top_join_list);
+  /* Reconnect the next_leaf chain. */
+  leaf_tables.concat(&subq_select->leaf_tables);
+
+  ftfunc_list->concat(subq_select->ftfunc_list);
+  if (join ||
+      thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+      thd->lex->sql_command == SQLCOM_DELETE_MULTI)
+  {
+    List_iterator_fast<Item_in_subselect> li(subq_select->sj_subselects);
+    Item_in_subselect *in_subq;
+    while ((in_subq= li++))
+    {
+      sj_subselects.push_back(in_subq);
+      if (in_subq->emb_on_expr_nest == NO_JOIN_NEST)
+         in_subq->emb_on_expr_nest= derived;
+    }
+  }
+  /*
+    Remove merged table from chain.
+    When merge_subquery is called at a subquery-to-semijoin transformation
+    the derived isn't in the leaf_tables list, so in this case the call of
+    remove_table_from_list does not cause any actions.
+  */
+  remove_table_from_list(derived);
+
+  /* Walk through child's tables and adjust table map, tablenr,
+   * parent_lex */
+  subq_select->remap_tables(derived, map, table_no, this);
+  subq_select->merged_into= this;
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Mark tables from the leaf_tables list as belong to a derived table.
+
+  @param derived   tables will be marked as belonging to this derived
+
+  @details
+  Run through the leaf_list and mark all tables as belonging to the 'derived'.
+*/
+
+void SELECT_LEX::mark_as_belong_to_derived(TABLE_LIST *derived)
+{
+  /* Mark tables as belonging to this DT */
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    tl->open_type= OT_BASE_ONLY;
+    tl->belong_to_derived= derived;
+  }
+}
+
+
+/**
+  @brief
+  Update used_tables cache for this select
+
+  @details
+  This function updates used_tables cache of ON expressions of all tables
+  in the leaf_tables list and of the conds expression (if any).
+*/
+
+void SELECT_LEX::update_used_tables()
+{
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    TABLE_LIST *embedding;
+    embedding= tl;
+    do
+    {
+      bool maybe_null;
+      if ((maybe_null= test(embedding->outer_join)))
+      {
+	tl->table->maybe_null= maybe_null;
+        break;
+      }
+    }
+    while ((embedding= embedding->embedding));
+    if (tl->on_expr)
+    {
+      tl->on_expr->update_used_tables();
+      tl->on_expr->walk(&Item::eval_not_null_tables, 0, NULL);
+    }
+    embedding= tl->embedding;
+    while (embedding)
+    {
+      if (embedding->on_expr && 
+          embedding->nested_join->join_list.head() == tl)
+      {
+        embedding->on_expr->update_used_tables();
+        embedding->on_expr->walk(&Item::eval_not_null_tables, 0, NULL);
+      }
+      tl= embedding;
+      embedding= tl->embedding;
+    }
+  }
+  if (join->conds)
+  {
+    join->conds->update_used_tables();
+    join->conds->walk(&Item::eval_not_null_tables, 0, NULL);
+  }
+  if (join->having)
+  {
+    join->having->update_used_tables();
+  }
+
+  Item *item;
+  List_iterator_fast<Item> it(join->fields_list);
+  while ((item= it++))
+  {
+    item->update_used_tables();
+  }
+  Item_outer_ref *ref;
+  List_iterator_fast<Item_outer_ref> ref_it(inner_refs_list);
+  while ((ref= ref_it++))
+  {
+    item= ref->outer_ref;
+    item->update_used_tables();
+  }
+  for (ORDER *order= group_list.first; order; order= order->next)
+    (*order->item)->update_used_tables();
+  if (!master_unit()->is_union())
+  {
+    for (ORDER *order= order_list.first; order; order= order->next)
+      (*order->item)->update_used_tables();
+  }      
+}
+
+
+/**
+  Set the EXPLAIN type for this subquery.
+*/
+
+void st_select_lex::set_explain_type()
+{
+  bool is_primary= FALSE;
+  if (next_select())
+    is_primary= TRUE;
+
+  if (!is_primary && first_inner_unit())
+  {
+    /*
+      If there is at least one materialized derived|view then it's a PRIMARY select.
+      Otherwise, all derived tables/views were merged and this select is a SIMPLE one.
+    */
+    for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit())
+    {
+      if ((!un->derived || un->derived->is_materialized_derived()))
+      {
+        is_primary= TRUE;
+        break;
+      }
+    }
+  }
+
+  SELECT_LEX *first= master_unit()->first_select();
+  /* drop UNCACHEABLE_EXPLAIN, because it is for internal usage only */
+  uint8 is_uncacheable= (uncacheable & ~UNCACHEABLE_EXPLAIN);
+
+  type= ((&master_unit()->thd->lex->select_lex == this) ?
+         (is_primary ? "PRIMARY" : "SIMPLE"):    
+         ((this == first) ?
+          ((linkage == DERIVED_TABLE_TYPE) ?
+           "DERIVED" :
+           ((is_uncacheable & UNCACHEABLE_DEPENDENT) ?
+            "DEPENDENT SUBQUERY" :
+            (is_uncacheable ? "UNCACHEABLE SUBQUERY" :
+             "SUBQUERY"))) :
+          ((is_uncacheable & UNCACHEABLE_DEPENDENT) ?
+           "DEPENDENT UNION":
+           is_uncacheable ? "UNCACHEABLE UNION":
+           "UNION")));
+  options|= SELECT_DESCRIBE;
+}
+
+
+/**
+  @brief
+  Increase estimated number of records for a derived table/view
+
+  @param records  number of records to increase estimate by
+
+  @details
+  This function increases estimated number of records by the 'records'
+  for the derived table to which this select belongs to.
+*/
+
+void SELECT_LEX::increase_derived_records(ha_rows records)
+{
+  SELECT_LEX_UNIT *unit= master_unit();
+  DBUG_ASSERT(unit->derived);
+
+  select_union *result= (select_union*)unit->result;
+  result->records+= records;
+}
+
+
+/**
+  @brief
+  Mark select's derived table as a const one.
+
+  @param empty Whether select has an empty result set
+
+  @details
+  Mark derived table/view of this select as a constant one (to
+  materialize it at the optimization phase) unless this select belongs to a
+  union. Estimated number of rows is incremented if this select has non empty
+  result set.
+*/
+
+void SELECT_LEX::mark_const_derived(bool empty)
+{
+  TABLE_LIST *derived= master_unit()->derived;
+  if (!join->thd->lex->describe && derived)
+  {
+    if (!empty)
+      increase_derived_records(1);
+    if (!master_unit()->is_union() && !derived->is_merged_derived())
+      derived->fill_me= TRUE;
+  }
+}
+
+
+bool st_select_lex::save_leaf_tables(THD *thd)
+{
+  Query_arena *arena= thd->stmt_arena, backup;
+  if (arena->is_conventional())
+    arena= 0;                                  
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+
+  List_iterator_fast<TABLE_LIST> li(leaf_tables);
+  TABLE_LIST *table;
+  while ((table= li++))
+  {
+    if (leaf_tables_exec.push_back(table))
+      return 1;
+    table->tablenr_exec= table->table->tablenr;
+    table->map_exec= table->table->map;
+    if (join && (join->select_options & SELECT_DESCRIBE))
+      table->maybe_null_exec= 0;
+    else
+      table->maybe_null_exec= table->table->maybe_null;
+  }
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+
+  return 0;
+}
+
+
+bool st_select_lex::save_prep_leaf_tables(THD *thd)
+{
+  if (!thd->save_prep_leaf_list)
+    return 0;
+
+  Query_arena *arena= thd->stmt_arena, backup;
+  if (arena->is_conventional())
+    arena= 0;                                  
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+
+  List_iterator_fast<TABLE_LIST> li(leaf_tables);
+  TABLE_LIST *table;
+  while ((table= li++))
+  {
+    if (leaf_tables_prep.push_back(table))
+      return 1;
+  }
+  thd->lex->select_lex.is_prep_leaf_list_saved= TRUE; 
+  thd->save_prep_leaf_list= FALSE;
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+
+  return 0;
+}
+
+
 /**
   A routine used by the parser to decide whether we are specifying a full
   partitioning or if only partitions to add or to split.
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 542e8b42ae2..bbc91c2f987 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -190,6 +190,9 @@ enum enum_sql_command {
   SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES,
   SQLCOM_SIGNAL, SQLCOM_RESIGNAL,
   SQLCOM_SHOW_RELAYLOG_EVENTS, 
+  SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS,
+  SQLCOM_SHOW_CLIENT_STATS,
+
   /*
     When a command is added here, be sure it's also added in mysqld.cc
     in "struct show_var_st status_vars[]= {" ...
@@ -275,24 +278,41 @@ typedef struct st_lex_server_options
   would better be renamed to st_lex_replication_info).  Some fields,
   e.g., delay, are saved in Relay_log_info, not in Master_info.
 */
-typedef struct st_lex_master_info
+struct LEX_MASTER_INFO
 {
+  DYNAMIC_ARRAY repl_ignore_server_ids;
   char *host, *user, *password, *log_file_name;
-  uint port, connect_retry;
-  float heartbeat_period;
+  char *ssl_key, *ssl_cert, *ssl_ca, *ssl_capath, *ssl_cipher;
+  char *relay_log_name;
   ulonglong pos;
+  ulong relay_log_pos;
   ulong server_id;
+  uint port, connect_retry;
+  float heartbeat_period;
   /*
     Enum is used for making it possible to detect if the user
     changed variable or if it should be left at old value
    */
   enum {LEX_MI_UNCHANGED, LEX_MI_DISABLE, LEX_MI_ENABLE}
     ssl, ssl_verify_server_cert, heartbeat_opt, repl_ignore_server_ids_opt;
-  char *ssl_key, *ssl_cert, *ssl_ca, *ssl_capath, *ssl_cipher;
-  char *relay_log_name;
-  ulong relay_log_pos;
-  DYNAMIC_ARRAY repl_ignore_server_ids;
-} LEX_MASTER_INFO;
+
+  void init()
+  {
+    bzero(this, sizeof(*this));
+    my_init_dynamic_array(&repl_ignore_server_ids,
+                          sizeof(::server_id), 0, 16);
+  }
+  void reset()
+  {
+    delete_dynamic(&repl_ignore_server_ids);
+    host= user= password= log_file_name= ssl_key= ssl_cert= ssl_ca=
+      ssl_capath= ssl_cipher= relay_log_name= 0;
+    pos= relay_log_pos= server_id= port= connect_retry= 0;
+    heartbeat_period= 0;
+    ssl= ssl_verify_server_cert= heartbeat_opt=
+      repl_ignore_server_ids_opt= LEX_MI_UNCHANGED;
+  }
+};
 
 typedef struct st_lex_reset_slave
 {
@@ -499,7 +519,8 @@ public:
 
   /*
     result of this query can't be cached, bit field, can be :
-      UNCACHEABLE_DEPENDENT
+      UNCACHEABLE_DEPENDENT_GENERATED
+      UNCACHEABLE_DEPENDENT_INJECTED
       UNCACHEABLE_RAND
       UNCACHEABLE_SIDEEFFECT
       UNCACHEABLE_EXPLAIN
@@ -533,10 +554,12 @@ public:
   virtual void init_query();
   virtual void init_select();
   void include_down(st_select_lex_node *upper);
+  void add_slave(st_select_lex_node *slave_arg);
   void include_neighbour(st_select_lex_node *before);
   void include_standalone(st_select_lex_node *sel, st_select_lex_node **ref);
   void include_global(st_select_lex_node **plink);
   void exclude();
+  void exclude_from_tree();
 
   virtual st_select_lex_unit* master_unit()= 0;
   virtual st_select_lex* outer_select()= 0;
@@ -561,6 +584,11 @@ public:
   friend bool mysql_new_select(LEX *lex, bool move_down);
   friend bool mysql_make_view(THD *thd, File_parser *parser,
                               TABLE_LIST *table, uint flags);
+  friend bool mysql_derived_prepare(THD *thd, LEX *lex,
+                                  TABLE_LIST *orig_table_list);
+  friend bool mysql_derived_merge(THD *thd, LEX *lex,
+                                  TABLE_LIST *orig_table_list);
+  friend bool TABLE_LIST::init_derived(THD *thd, bool init_view);
 private:
   void fast_exclude();
 };
@@ -581,9 +609,6 @@ class st_select_lex_unit: public st_select_lex_node {
 protected:
   TABLE_LIST result_table_list;
   select_union *union_result;
-  TABLE *table; /* temporary table using for appending UNION results */
-
-  select_result *result;
   ulonglong found_rows_for_union;
   bool saved_error;
 
@@ -596,6 +621,9 @@ public:
   {
   }
 
+
+  TABLE *table; /* temporary table using for appending UNION results */
+  select_result *result;
   bool  prepared, // prepare phase already performed for UNION (unit)
     optimized, // optimize phase already performed for UNION (unit)
     executed, // already executed
@@ -622,6 +650,11 @@ public:
   ha_rows select_limit_cnt, offset_limit_cnt;
   /* not NULL if unit used in subselect, point to subselect item */
   Item_subselect *item;
+  /*
+    TABLE_LIST representing this union in the embedding select. Used for
+    derived tables/views handling.
+  */
+  TABLE_LIST *derived;
   /* thread handler */
   THD *thd;
   /*
@@ -634,6 +667,13 @@ public:
   bool describe; /* union exec() called for EXPLAIN */
   Procedure *last_procedure;	 /* Pointer to procedure, if such exists */
 
+  /* 
+    Insert table with stored virtual columns.
+    This is used only in those rare cases 
+    when the list of inserted values is empty.
+  */
+  TABLE *insert_table_with_stored_vcol;
+
   void init_query();
   st_select_lex_unit* master_unit();
   st_select_lex* outer_select();
@@ -651,6 +691,7 @@ public:
 
   /* UNION methods */
   bool prepare(THD *thd, select_result *result, ulong additional_options);
+  bool optimize();
   bool exec();
   bool cleanup();
   inline void unclean() { cleaned= 0; }
@@ -661,11 +702,14 @@ public:
   bool add_fake_select_lex(THD *thd);
   void init_prepare_fake_select_lex(THD *thd);
   inline bool is_prepared() { return prepared; }
-  bool change_result(select_subselect *result, select_subselect *old_result);
+  bool change_result(select_result_interceptor *result,
+                     select_result_interceptor *old_result);
   void set_limit(st_select_lex *values);
   void set_thd(THD *thd_arg) { thd= thd_arg; }
   inline bool is_union (); 
 
+  void set_unique_exclude();
+
   friend void lex_start(THD *thd);
   friend int subselect_union_engine::exec();
 
@@ -706,12 +750,30 @@ public:
   List<TABLE_LIST> top_join_list; /* join list of the top level          */
   List<TABLE_LIST> *join_list;    /* list for the currently parsed join  */
   TABLE_LIST *embedding;          /* table embedding to the above list   */
+  List<TABLE_LIST> sj_nests;      /* Semi-join nests within this join */
   /*
     Beginning of the list of leaves in a FROM clause, where the leaves
     inlcude all base tables including view tables. The tables are connected
     by TABLE_LIST::next_leaf, so leaf_tables points to the left-most leaf.
+
+    List of all base tables local to a subquery including all view
+    tables. Unlike 'next_local', this in this list views are *not*
+    leaves. Created in setup_tables() -> make_leaves_list().
+  */
+  /* 
+    Subqueries that will need to be converted to semi-join nests, including
+    those converted to jtbm nests. The list is emptied when conversion is done.
   */
-  TABLE_LIST *leaf_tables;
+  List<Item_in_subselect> sj_subselects;
+
+  List<TABLE_LIST> leaf_tables;
+  List<TABLE_LIST> leaf_tables_exec;
+  List<TABLE_LIST> leaf_tables_prep;
+  bool is_prep_leaf_list_saved;
+  uint insert_tables;
+  st_select_lex *merged_into; /* select which this select is merged into */
+                              /* (not 0 only for views/derived tables)   */
+
   const char *type;               /* type of select for EXPLAIN          */
 
   SQL_I_List<ORDER> order_list;   /* ORDER clause */
@@ -746,8 +808,6 @@ public:
   bool  braces;   	/* SELECT ... UNION (SELECT ... ) <- this braces */
   /* TRUE when having fix field called in processing of this SELECT */
   bool having_fix_field;
-  /* TRUE when GROUP BY fix field called in processing of this SELECT */
-  bool group_fix_field;
   /* List of references to fields referenced from inner selects */
   List<Item_outer_ref> inner_refs_list;
   /* Number of Item_sum-derived objects in this SELECT */
@@ -758,6 +818,11 @@ public:
   /* explicit LIMIT clause was used */
   bool explicit_limit;
   /*
+    This array is used to note  whether we have any candidates for
+    expression caching in the corresponding clauses
+  */
+  bool expr_cache_may_be_used[PARSING_PLACE_SIZE];
+  /*
     there are subquery in HAVING clause => we can't close tables before
     query processing end even if we use temporary table
   */
@@ -826,8 +891,9 @@ public:
   {
     return master_unit()->return_after_parsing();
   }
+  inline bool is_subquery_function() { return master_unit()->item != 0; }
 
-  void mark_as_dependent(st_select_lex *last);
+  bool mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency);
 
   bool set_braces(bool value);
   bool inc_in_sum_expr();
@@ -912,6 +978,37 @@ public:
   }
 
   void clear_index_hints(void) { index_hints= NULL; }
+  bool is_part_of_union() { return master_unit()->is_union(); }
+  /*
+    Optimize all subqueries that have not been flattened into semi-joins.
+    This functionality is a method of SELECT_LEX instead of JOIN because
+    some SQL statements as DELETE do not have a corresponding JOIN object.
+  */
+  bool optimize_unflattened_subqueries();
+  /* Set the EXPLAIN type for this subquery. */
+  void set_explain_type();
+  bool handle_derived(LEX *lex, uint phases);
+  void append_table_to_list(TABLE_LIST *TABLE_LIST::*link, TABLE_LIST *table);
+  bool get_free_table_map(table_map *map, uint *tablenr);
+  void remove_table_from_list(TABLE_LIST *table);
+  void remap_tables(TABLE_LIST *derived, table_map map,
+                    uint tablenr, st_select_lex *parent_lex);
+  bool merge_subquery(THD *thd, TABLE_LIST *derived, st_select_lex *subq_lex,
+                      uint tablenr, table_map map);
+  inline bool is_mergeable()
+  {
+    return (next_select() == 0 && group_list.elements == 0 &&
+            having == 0 && with_sum_func == 0 &&
+            table_list.elements >= 1 && !(options & SELECT_DISTINCT) &&
+            select_limit == 0);
+  }
+  void mark_as_belong_to_derived(TABLE_LIST *derived);
+  void increase_derived_records(ha_rows records);
+  void update_used_tables();
+  void mark_const_derived(bool empty);
+
+  bool save_leaf_tables(THD *thd);
+  bool save_prep_leaf_tables(THD *thd);
 
   /*
     For MODE_ONLY_FULL_GROUP_BY we need to maintain two flags:
@@ -1071,6 +1168,7 @@ enum xa_option_words {XA_NONE, XA_JOIN, XA_RESUME, XA_ONE_PHASE,
                       XA_SUSPEND, XA_FOR_MIGRATE};
 
 extern const LEX_STRING null_lex_str;
+extern const LEX_STRING empty_lex_str;
 
 class Sroutine_hash_entry;
 
@@ -2214,6 +2312,7 @@ struct LEX: public Query_tables_list
   LEX_USER *grant_user;
   XID *xid;
   THD *thd;
+  Virtual_column_info *vcol_info;
 
   /* maintain a list of used plugins for this LEX */
   DYNAMIC_ARRAY plugins;
@@ -2221,8 +2320,6 @@ struct LEX: public Query_tables_list
 
   CHARSET_INFO *charset;
   bool text_string_is_7bit;
-  /* store original leaf_tables for INSERT SELECT and PS/SP */
-  TABLE_LIST *leaf_tables_insert;
 
   /** SELECT of CREATE VIEW statement */
   LEX_STRING create_view_select;
@@ -2300,6 +2397,14 @@ struct LEX: public Query_tables_list
     syntax error back.
   */
   bool expr_allows_subselect;
+  /*
+    A special command "PARSE_VCOL_EXPR" is defined for the parser 
+    to translate a defining expression of a virtual column into an 
+    Item object.
+    The following flag is used to prevent other applications to use 
+    this command.
+  */
+  bool parse_vcol_expr;
 
   enum SSL_type ssl_type;			/* defined in violite.h */
   enum enum_duplicates duplicates;
@@ -2335,7 +2440,7 @@ struct LEX: public Query_tables_list
     DERIVED_SUBQUERY and DERIVED_VIEW).
   */
   uint8 derived_tables;
-  uint8 create_view_algorithm;
+  uint16 create_view_algorithm;
   uint8 create_view_check;
   uint8 context_analysis_only;
   bool drop_if_exists, drop_temporary, local_file, one_shot_set;
@@ -2344,7 +2449,7 @@ struct LEX: public Query_tables_list
 
   enum enum_yes_no_unknown tx_chain, tx_release;
   bool safe_to_cache_query;
-  bool subqueries, ignore;
+  bool subqueries, ignore, online;
   st_parsing_options parsing_options;
   Alter_info alter_info;
   /*
@@ -2422,6 +2527,11 @@ struct LEX: public Query_tables_list
   };
 
   /**
+    Collects create options for Field and KEY
+  */
+  engine_option_value *option_list, *option_list_last;
+
+  /**
     During name resolution search only in the table list given by 
     Name_resolution_context::first_name_resolution_table and
     Name_resolution_context::last_name_resolution_table
@@ -2461,9 +2571,15 @@ struct LEX: public Query_tables_list
   {
     return (context_analysis_only &
             (CONTEXT_ANALYSIS_ONLY_PREPARE |
+             CONTEXT_ANALYSIS_ONLY_VCOL_EXPR |
              CONTEXT_ANALYSIS_ONLY_VIEW));
   }
 
+  inline bool is_view_context_analysis()
+  {
+    return (context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW);
+  }
+
   inline void uncacheable(uint8 cause)
   {
     safe_to_cache_query= 0;
@@ -2510,6 +2626,8 @@ struct LEX: public Query_tables_list
     switch (sql_command) {
     case SQLCOM_UPDATE:
     case SQLCOM_UPDATE_MULTI:
+    case SQLCOM_DELETE:
+    case SQLCOM_DELETE_MULTI:
     case SQLCOM_INSERT:
     case SQLCOM_INSERT_SELECT:
     case SQLCOM_REPLACE:
@@ -2744,6 +2862,8 @@ extern void lex_init(void);
 extern void lex_free(void);
 extern void lex_start(THD *thd);
 extern void lex_end(LEX *lex);
+void end_lex_with_single_table(THD *thd, TABLE *table, LEX *old_lex);
+int init_lex_with_single_table(THD *thd, TABLE *table, LEX *lex);
 extern int MYSQLlex(void *arg, void *yythd);
 
 extern void trim_whitespace(CHARSET_INFO *cs, LEX_STRING *str);
diff --git a/sql/sql_lifo_buffer.h b/sql/sql_lifo_buffer.h
new file mode 100644
index 00000000000..34f9624436d
--- /dev/null
+++ b/sql/sql_lifo_buffer.h
@@ -0,0 +1,342 @@
+/**
+  @defgroup Bi-directional LIFO buffers used by DS-MRR implementation
+  @{
+*/
+
+class Forward_lifo_buffer;
+class Backward_lifo_buffer;
+
+
+/*
+  A base class for in-memory buffer used by DS-MRR implementation. Common
+  properties:
+  - The buffer is last-in-first-out, i.e. elements that are written last are
+    read first.
+  - The buffer contains fixed-size elements. The elements are either atomic
+    byte sequences or pairs of them.
+  - The buffer resides in the memory provided by the user. It is possible to
+     = dynamically (ie. between write operations) add ajacent memory space to
+       the buffer
+     = dynamically remove unused space from the buffer.
+    The intent of this is to allow to have two buffers on adjacent memory
+    space, one is being read from (and so its space shrinks), while the other 
+    is being written to (and so it needs more and more space).
+
+  There are two concrete classes, Forward_lifo_buffer and Backward_lifo_buffer.
+*/
+
+class Lifo_buffer 
+{
+protected:
+  size_t size1;
+  size_t size2;
+
+public:
+  /**
+    write() will put into buffer size1 bytes pointed by write_ptr1. If
+    size2!=0, then they will be accompanied by size2 bytes pointed by
+    write_ptr2.
+  */
+  uchar *write_ptr1;
+  uchar *write_ptr2;
+
+  /**
+    read() will do reading by storing pointers to read data into read_ptr1 or
+    into (read_ptr1, read_ptr2), depending on whether the buffer was set to
+    store single objects or pairs.
+  */
+  uchar *read_ptr1;
+  uchar *read_ptr2;
+
+protected:
+  uchar *start; /**< points to start of buffer space */
+  uchar *end;   /**< points to just beyond the end of buffer space */
+public:
+
+  enum enum_direction {
+    BACKWARD=-1, /**< buffer is filled/read from bigger to smaller memory addresses */
+    FORWARD=1  /**< buffer is filled/read from smaller to bigger memory addresses */
+  };
+
+  virtual enum_direction type() = 0;
+
+  /* Buffer space control functions */
+
+  /** Let the buffer store data in the given space. */
+  void set_buffer_space(uchar *start_arg, uchar *end_arg) 
+  {
+    start= start_arg;
+    end= end_arg;
+    TRASH(start, end - start);
+    reset();
+  }
+  
+  /** 
+    Specify where write() should get the source data from, as well as source
+    data size.
+  */
+  void setup_writing(size_t len1, size_t len2)
+  {
+    size1= len1;
+    size2= len2;
+  }
+
+  /** 
+    Specify where read() should store pointers to read data, as well as read
+    data size. The sizes must match those passed to setup_writing().
+  */
+  void setup_reading(size_t len1, size_t len2)
+  {
+    DBUG_ASSERT(len1 == size1);
+    DBUG_ASSERT(len2 == size2);
+  }
+  
+  bool can_write()
+  {
+    return have_space_for(size1 + size2);
+  }
+  virtual void write() = 0;
+
+  bool is_empty() { return used_size() == 0; }
+  virtual bool read() = 0;
+  
+  void sort(qsort2_cmp cmp_func, void *cmp_func_arg)
+  {
+    size_t elem_size= size1 + size2;
+    size_t n_elements= used_size() / elem_size;
+    my_qsort2(used_area(), n_elements, elem_size, cmp_func, cmp_func_arg);
+  }
+
+  virtual void reset() = 0;
+  virtual uchar *end_of_space() = 0;
+protected:
+  virtual size_t used_size() = 0;
+  
+  /* To be used only by iterator class: */
+  virtual uchar *get_pos()= 0;
+  virtual bool read(uchar **position, uchar **ptr1, uchar **ptr2)= 0;
+  friend class Lifo_buffer_iterator;
+public:
+  virtual bool have_space_for(size_t bytes) = 0;
+
+  virtual void remove_unused_space(uchar **unused_start, uchar **unused_end)=0;
+  virtual uchar *used_area() = 0; 
+  virtual ~Lifo_buffer() {};
+};
+
+
+/**
+  Forward LIFO buffer
+
+  The buffer that is being written to from start to end and read in the
+  reverse.  'pos' points to just beyond the end of used space.
+
+  It is possible to grow/shink the buffer at the end bound
+
+     used space      unused space  
+   *==============*-----------------*
+   ^              ^                 ^
+   |              |                 +--- end
+   |              +---- pos              
+   +--- start           
+*/
+
+class Forward_lifo_buffer: public Lifo_buffer
+{
+  uchar *pos;
+public:
+  enum_direction type() { return FORWARD; }
+  size_t used_size()
+  {
+    return (size_t)(pos - start);
+  }
+  void reset()
+  {
+    pos= start;
+  }
+  uchar *end_of_space() { return pos; }
+  bool have_space_for(size_t bytes)
+  {
+    return (pos + bytes < end);
+  }
+
+  void write()
+  {
+    write_bytes(write_ptr1, size1);
+    if (size2)
+      write_bytes(write_ptr2, size2);
+  }
+  void write_bytes(const uchar *data, size_t bytes)
+  {
+    DBUG_ASSERT(have_space_for(bytes));
+    memcpy(pos, data, bytes);
+    pos += bytes;
+  }
+  bool have_data(uchar *position, size_t bytes)
+  {
+    return ((position - start) >= (ptrdiff_t)bytes);
+  }
+  uchar *read_bytes(uchar **position, size_t bytes)
+  {
+    DBUG_ASSERT(have_data(*position, bytes));
+    *position= (*position) - bytes;
+    return *position;
+  }
+  bool read() { return read(&pos, &read_ptr1, &read_ptr2); }
+  bool read(uchar **position, uchar **ptr1, uchar **ptr2)
+  {
+    if (!have_data(*position, size1 + size2))
+      return TRUE;
+    if (size2)
+      *ptr2= read_bytes(position, size2);
+    *ptr1= read_bytes(position, size1);
+    return FALSE;
+  }
+  void remove_unused_space(uchar **unused_start, uchar **unused_end)
+  {
+    DBUG_ASSERT(0); /* Don't need this yet */
+  }
+  /**
+    Add more space to the buffer. The caller is responsible that the space
+    being added is adjacent to the end of the buffer.
+
+    @param unused_start Start of space
+    @param unused_end   End of space
+  */
+  void grow(uchar *unused_start, uchar *unused_end)
+  {
+    DBUG_ASSERT(unused_end >= unused_start);
+    DBUG_ASSERT(end == unused_start);
+    TRASH(unused_start, unused_end - unused_start);
+    end= unused_end;
+  }
+  /* Return pointer to start of the memory area that is occupied by the data */
+  uchar *used_area() { return start; }
+  friend class Lifo_buffer_iterator;
+  uchar *get_pos() { return pos; }
+};
+
+
+
+/**
+  Backward LIFO buffer
+
+  The buffer that is being written to from start to end and read in the
+  reverse.  'pos' points to the start of used space.
+
+  It is possible to grow/shink the buffer at the start.
+
+     unused space      used space  
+   *--------------*=================*
+   ^              ^                 ^
+   |              |                 +--- end
+   |              +---- pos              
+   +--- start           
+*/
+class Backward_lifo_buffer: public Lifo_buffer
+{
+  uchar *pos;
+public:
+  enum_direction type() { return BACKWARD; }
+ 
+  size_t used_size()
+  {
+    return (size_t)(end - pos);
+  }
+  void reset()
+  {
+    pos= end;
+  }
+  uchar *end_of_space() { return end; }
+  bool have_space_for(size_t bytes)
+  {
+    return (pos - bytes >= start);
+  }
+  void write()
+  {
+    if (write_ptr2)
+      write_bytes(write_ptr2, size2);
+    write_bytes(write_ptr1, size1);
+  }
+  void write_bytes(const uchar *data, size_t bytes)
+  {
+    DBUG_ASSERT(have_space_for(bytes));
+    pos -= bytes;
+    memcpy(pos, data, bytes);
+  }
+  bool read()
+  {
+    return read(&pos, &read_ptr1, &read_ptr2);
+  }
+  bool read(uchar **position, uchar **ptr1, uchar **ptr2)
+  {
+    if (!have_data(*position, size1 + size2))
+      return TRUE;
+    *ptr1= read_bytes(position, size1);
+    if (size2)
+      *ptr2= read_bytes(position, size2);
+    return FALSE;
+  }
+  bool have_data(uchar *position, size_t bytes)
+  {
+    return ((end - position) >= (ptrdiff_t)bytes);
+  }
+  uchar *read_bytes(uchar **position, size_t bytes)
+  {
+    DBUG_ASSERT(have_data(*position, bytes));
+    uchar *ret= *position;
+    *position= *position + bytes;
+    return ret;
+  }
+  /**
+    Stop using/return the unused part of the space
+    @param unused_start  OUT Start of the unused space
+    @param unused_end    OUT End of the unused space
+  */
+  void remove_unused_space(uchar **unused_start, uchar **unused_end)
+  {
+    *unused_start= start;
+    *unused_end= pos;
+    start= pos;
+  }
+  void grow(uchar *unused_start, uchar *unused_end)
+  {
+    DBUG_ASSERT(0); /* Not used for backward buffers */
+  }
+  /* Return pointer to start of the memory area that is occupied by the data */
+  uchar *used_area() { return pos; }
+  friend class Lifo_buffer_iterator;
+  uchar *get_pos() { return pos; }
+};
+
+
+/** Iterator to walk over contents of the buffer without reading from it */
+class Lifo_buffer_iterator
+{
+  uchar *pos;
+  Lifo_buffer *buf;
+  
+public:
+  /* The data is read to here */
+  uchar *read_ptr1;
+  uchar *read_ptr2;
+
+  void init(Lifo_buffer *buf_arg)
+  {
+    buf= buf_arg;
+    pos= buf->get_pos();
+  }
+  /*
+    Read the next value. The calling convention is the same as buf->read()
+    has.
+
+    @retval FALSE - ok
+    @retval TRUE  - EOF, reached the end of the buffer
+  */
+  bool read() 
+  {
+    return buf->read(&pos, &read_ptr1, &read_ptr2);
+  }
+};
+
+
diff --git a/sql/sql_list.cc b/sql/sql_list.cc
index 99aeccd71dc..2c1b3c47d55 100644
--- a/sql/sql_list.cc
+++ b/sql/sql_list.cc
@@ -18,7 +18,6 @@
 #pragma implementation				// gcc: Class implementation
 #endif
 
-#include "unireg.h"
 #include "sql_list.h"
 
 list_node end_of_list;
diff --git a/sql/sql_list.h b/sql/sql_list.h
index 769f44274de..5441fec5f63 100644
--- a/sql/sql_list.h
+++ b/sql/sql_list.h
@@ -15,17 +15,10 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 
-#include "my_global.h"
-#include "my_sys.h"
-#include "m_string.h" /* for TRASH */
-
-
 #ifdef USE_PRAGMA_INTERFACE
 #pragma interface			/* gcc class implementation */
 #endif
 
-void *sql_alloc(size_t);
-
 #include "my_sys.h"                    /* alloc_root, TRASH, MY_WME,
                                           MY_FAE, MY_ALLOW_ZERO_PTR */
 #include "m_string.h"                           /* bfill */
@@ -54,7 +47,7 @@ public:
   static void operator delete[](void *ptr, MEM_ROOT *mem_root)
   { /* never called */ }
   static void operator delete[](void *ptr, size_t size) { TRASH(ptr, size); }
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   bool dummy;
   inline Sql_alloc() :dummy(0) {}
   inline ~Sql_alloc() {}
@@ -163,6 +156,7 @@ struct list_node :public Sql_alloc
   }
 };
 
+typedef bool List_eq(void *a, void *b);
 
 extern MYSQL_PLUGIN_IMPORT list_node end_of_list;
 
@@ -246,6 +240,11 @@ public:
   {
     if (!list->is_empty())
     {
+      if (is_empty())
+      {
+        *this= *list;
+        return;
+      }
       *last= list->first;
       last= list->last;
       elements+= list->elements;
@@ -266,11 +265,13 @@ public:
     list_node *node= first;
     list_node *list_first= list->first;
     elements=0;
-    while (node && node != list_first)
+    while (node != &end_of_list && node != list_first)
     {
       prev= &node->next;
       node= node->next;
       elements++;
+      if (node == &end_of_list)
+        return;
     }
     *prev= *last;
     last= prev;
@@ -299,6 +300,16 @@ public:
   inline void **head_ref() { return first != &end_of_list ? &first->info : 0; }
   inline bool is_empty() { return first == &end_of_list ; }
   inline list_node *last_ref() { return &end_of_list; }
+  inline bool add_unique(void *info, List_eq *eq)
+  {
+    list_node *node= first;
+    for (;
+         node != &end_of_list && (!(*eq)(node->info, info));
+         node= node->next) ;
+    if (node == &end_of_list)
+      return push_back(info);
+    return 1;
+  }
   friend class base_list_iterator;
   friend class error_list;
   friend class error_list_iterator;
@@ -469,6 +480,8 @@ public:
   inline void concat(List<T> *list) { base_list::concat(list); }
   inline void disjoin(List<T> *list) { base_list::disjoin(list); }
   inline void prepand(List<T> *list) { base_list::prepand(list); }
+  inline bool add_unique(T *a, bool (*eq)(T *a, T *b))
+  { return base_list::add_unique(a, (List_eq *)eq); }
   void delete_elements(void)
   {
     list_node *element,*next;
@@ -521,6 +534,47 @@ public:
 
 
 /*
+  Bubble sort algorithm for List<T>.
+  This sort function is supposed to be used only for very short list.
+  Currently it is used for the lists of Item_equal objects and
+  for some lists in the table elimination algorithms. In both
+  cases the sorted lists are very short.
+*/
+
+template <class T> 
+inline void bubble_sort(List<T> *list_to_sort,
+                        int (*sort_func)(T *a, T *b, void *arg), void *arg)
+{
+  bool swap;
+  T **ref1= 0;
+  T **ref2= 0;
+  List_iterator<T> it(*list_to_sort);
+  do
+  {
+    T **last_ref= ref1;
+    T *item1= it++;
+    ref1= it.ref();
+    T *item2;
+
+    swap= FALSE;
+    while ((item2= it++) && (ref2= it.ref()) != last_ref)
+    {
+      if (sort_func(item1, item2, arg) < 0)
+      {
+        *ref1= item2;
+        *ref2= item1;
+        swap= TRUE;
+      }
+      else
+        item1= item2;
+      ref1= ref2;
+    }
+    it.rewind();
+  } while (swap);
+}
+
+
+/*
   A simple intrusive list which automaticly removes element from list
   on delete (for THD element)
 */
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 1819102aa34..6f84f68eeaa 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -38,6 +38,7 @@
 #include "sql_repl.h"
 #include "sp_head.h"
 #include "sql_trigger.h"
+#include "sql_derived.h"
 
 class XML_TAG {
 public:
@@ -103,6 +104,8 @@ public:
     ::end_io_cache(&cache);
     need_end_io_cache = 0;
   }
+  my_off_t file_length() { return cache.end_of_file; }
+  my_off_t position()    { return my_b_tell(&cache); }
 
   /*
     Either this method, or we need to make cache public
@@ -221,12 +224,15 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
 
   if (open_and_lock_tables(thd, table_list, TRUE, 0))
     DBUG_RETURN(TRUE);
+  if (mysql_handle_single_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) ||
+      mysql_handle_single_derived(thd->lex, table_list, DT_PREPARE))
+    DBUG_RETURN(TRUE);
   if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
                                     &thd->lex->select_lex.top_join_list,
                                     table_list,
-                                    &thd->lex->select_lex.leaf_tables, FALSE,
+                                    thd->lex->select_lex.leaf_tables, FALSE,
                                     INSERT_ACL | UPDATE_ACL,
-                                    INSERT_ACL | UPDATE_ACL))
+                                    INSERT_ACL | UPDATE_ACL, FALSE))
      DBUG_RETURN(-1);
   if (!table_list->table ||               // do not suport join view
       !table_list->updatable ||           // and derived tables
@@ -465,9 +471,9 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
     }
   }
 
+  thd_proc_info(thd, "reading file");
   if (!(error=test(read_info.error)))
   {
-
     table->next_number_field=table->found_next_number_field;
     if (ignore ||
 	handle_duplicates == DUP_REPLACE)
@@ -485,6 +491,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
                              (MODE_STRICT_TRANS_TABLES |
                               MODE_STRICT_ALL_TABLES)));
 
+    thd_progress_init(thd, 2);
     if (ex->filetype == FILETYPE_XML) /* load xml */
       error= read_xml_field(thd, info, table_list, fields_vars,
                             set_fields, set_values, read_info,
@@ -497,6 +504,9 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
       error= read_sep_field(thd, info, table_list, fields_vars,
                             set_fields, set_values, read_info,
 			    *enclosed, skip_lines, ignore);
+
+    thd_proc_info(thd, "End bulk insert");
+    thd_progress_next_stage(thd);
     if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
         table->file->ha_end_bulk_insert() && !error)
     {
@@ -673,11 +683,15 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
   size_t               pl= 0;
   List<Item>           fv;
   Item                *item, *val;
-  String               pfield, pfields;
   int                  n;
   const char          *tbl= table_name_arg;
   const char          *tdb= (thd->db != NULL ? thd->db : db_arg);
-  String              string_buf;
+  char 		      name_buffer[SAFE_NAME_LEN*2];
+  char                command_buffer[1024];
+  String              string_buf(name_buffer, sizeof(name_buffer),
+                                 system_charset_info);
+  String              pfields(command_buffer, sizeof(command_buffer),
+                              system_charset_info);
 
   if (!thd->db || strcmp(db_arg, thd->db)) 
   {
@@ -686,7 +700,7 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
       prefix table name with database name so that it 
       becomes a FQ name.
      */
-    string_buf.set_charset(system_charset_info);
+    string_buf.length(0);
     string_buf.append(db_arg);
     string_buf.append("`");
     string_buf.append(".");
@@ -707,6 +721,7 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
   /*
     prepare fields-list and SET if needed; print_query won't do that for us.
   */
+  pfields.length(0);
   if (!thd->lex->field_list.is_empty())
   {
     List_iterator<Item>  li(thd->lex->field_list);
@@ -750,8 +765,8 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
     }
   }
 
-  p= pfields.c_ptr_safe();
-  pl= strlen(p);
+  p=  pfields.c_ptr_safe();
+  pl= pfields.length();
 
   if (!(load_data_query= (char *)thd->alloc(lle.get_query_buffer_length() + 1 + pl)))
     return TRUE;
@@ -788,9 +803,16 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
   List_iterator_fast<Item> it(fields_vars);
   Item_field *sql_field;
   TABLE *table= table_list->table;
-  bool err;
+  bool err, progress_reports;
+  ulonglong counter, time_to_report_progress;
   DBUG_ENTER("read_fixed_length");
 
+  counter= 0;
+  time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10;
+  progress_reports= 1;
+  if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0)
+    progress_reports= 0;
+
   while (!read_info.read_fixed_length())
   {
     if (thd->killed)
@@ -798,6 +820,16 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
       thd->send_kill_message();
       DBUG_RETURN(1);
     }
+    if (progress_reports)
+    {
+      thd->progress.counter= read_info.position();
+      if (++counter >= time_to_report_progress)
+      {
+        time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10;
+        thd_progress_report(thd, thd->progress.counter,
+                            thd->progress.max_counter);
+      }
+    }
     if (skip_lines)
     {
       /*
@@ -811,7 +843,7 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
     }
     it.rewind();
     uchar *pos=read_info.row_start;
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
     read_info.row_end[0]=0;
 #endif
 
@@ -919,11 +951,18 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
   Item *item;
   TABLE *table= table_list->table;
   uint enclosed_length;
-  bool err;
+  bool err, progress_reports;
+  ulonglong counter, time_to_report_progress;
   DBUG_ENTER("read_sep_field");
 
   enclosed_length=enclosed.length();
 
+  counter= 0;
+  time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10;
+  progress_reports= 1;
+  if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0)
+    progress_reports= 0;
+
   for (;;it.rewind())
   {
     if (thd->killed)
@@ -932,6 +971,16 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
       DBUG_RETURN(1);
     }
 
+    if (progress_reports)
+    {
+      thd->progress.counter= read_info.position();
+      if (++counter >= time_to_report_progress)
+      {
+        time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10;
+        thd_progress_report(thd, thd->progress.counter,
+                            thd->progress.max_counter);
+      }
+    }
     restore_record(table, s->default_values);
 
     while ((item= it++))
@@ -953,11 +1002,10 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
       real_item= item->real_item();
 
       if ((!read_info.enclosed &&
-	  (enclosed_length && length == 4 &&
-           !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
+           (enclosed_length && length == 4 &&
+            !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
 	  (length == 1 && read_info.found_null))
       {
-
         if (real_item->type() == Item::FIELD_ITEM)
         {
           Field *field= ((Item_field *)real_item)->field;
@@ -1042,7 +1090,7 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
           if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP)
               ((Field_timestamp*) field)->set_time();
           /*
-            QQ: We probably should not throw warning for each field.
+            TODO: We probably should not throw warning for each field.
             But how about intention to always have the same number
             of warnings in THD::cuted_fields (and get rid of cuted_fields
             in the end ?)
@@ -1304,7 +1352,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
 		     String &field_term, String &line_start, String &line_term,
 		     String &enclosed_par, int escape, bool get_it_from_net,
 		     bool is_fifo)
-  :file(file_par), buff_length(tot_length), escape_char(escape),
+  :file(file_par), buffer(NULL), buff_length(tot_length), escape_char(escape),
    found_end_of_line(false), eof(false), need_end_io_cache(false),
    error(false), line_cuted(false), found_null(false), read_charset(cs)
 {
@@ -1336,7 +1384,6 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
   field_term_char= field_term_length ? (uchar) field_term_ptr[0] : INT_MAX;
   line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
 
-
   /* Set of a stack for unget if long terminators */
   uint length= max(cs->mbmaxlen, max(field_term_length, line_term_length)) + 1;
   set_if_bigger(length,line_start.length());
@@ -1382,9 +1429,7 @@ READ_INFO::~READ_INFO()
 {
   if (need_end_io_cache)
     ::end_io_cache(&cache);
-
-  if (buffer != NULL)
-    my_free(buffer);
+  my_free(buffer);
   List_iterator<XML_TAG> xmlit(taglist);
   XML_TAG *t;
   while ((t= xmlit++))
@@ -1504,8 +1549,9 @@ int READ_INFO::read_field()
 	// End of enclosed field if followed by field_term or line_term
 	if (chr == my_b_EOF ||
 	    (chr == line_term_char && terminator(line_term_ptr,
-						line_term_length)))
-	{					// Maybe unexpected linefeed
+                                                 line_term_length)))
+        {
+          /* Maybe unexpected linefeed */
 	  enclosed=1;
 	  found_end_of_line=1;
 	  row_start=buffer+1;
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 4fac64749e9..54b1e854ac1 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -48,9 +49,8 @@
                               // mysql_restore_table
 #include "sql_reload.h"       // reload_acl_and_cache
 #include "sql_admin.h"        // mysql_assign_to_keycache
-#include "sql_connect.h"      // check_user,
-                              // decrease_user_connections,
-                              // thd_init_client_charset, check_mqh,
+#include "sql_connect.h"      // decrease_user_connections,
+                              // check_mqh,
                               // reset_mqh
 #include "sql_rename.h"       // mysql_rename_table
 #include "sql_tablespace.h"   // mysql_alter_tablespace
@@ -94,9 +94,14 @@
 #include "debug_sync.h"
 #include "probes_mysql.h"
 #include "set_var.h"
+#include "log_slow.h"
 
 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
 
+#ifdef WITH_ARIA_STORAGE_ENGINE
+#include "../storage/maria/ha_maria.h"
+#endif
+
 /**
   @defgroup Runtime_Environment Runtime Environment
   @{
@@ -114,6 +119,8 @@
 
 static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables);
 static void sql_kill(THD *thd, ulong id, bool only_kill_query);
+static bool execute_show_status(THD *, TABLE_LIST *);
+static bool execute_rename_table(THD *, TABLE_LIST *, TABLE_LIST *);
 
 const char *any_db="*any*";	// Special symbol for check_access
 
@@ -155,7 +162,6 @@ const char *xa_state_names[]={
   "NON-EXISTING", "ACTIVE", "IDLE", "PREPARED", "ROLLBACK ONLY"
 };
 
-
 #ifdef HAVE_REPLICATION
 /**
   Returns true if all tables should be ignored.
@@ -257,22 +263,22 @@ void init_update_queries(void)
     the code, in particular in the Query_log_event's constructor.
   */
   sql_command_flags[SQLCOM_CREATE_TABLE]=   CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE |
-                                            CF_AUTO_COMMIT_TRANS |
+                                            CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS |
                                             CF_CAN_GENERATE_ROW_EVENTS;
   sql_command_flags[SQLCOM_CREATE_INDEX]=   CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_ALTER_TABLE]=    CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND |
-                                            CF_AUTO_COMMIT_TRANS;
+                                            CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS ;
   sql_command_flags[SQLCOM_TRUNCATE]=       CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND |
                                             CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_DROP_TABLE]=     CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_LOAD]=           CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE |
-                                            CF_CAN_GENERATE_ROW_EVENTS;
+                                            CF_CAN_GENERATE_ROW_EVENTS | CF_REPORT_PROGRESS;
   sql_command_flags[SQLCOM_CREATE_DB]=      CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_DROP_DB]=        CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_ALTER_DB_UPGRADE]= CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_ALTER_DB]=       CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_RENAME_TABLE]=   CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
-  sql_command_flags[SQLCOM_DROP_INDEX]=     CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
+  sql_command_flags[SQLCOM_DROP_INDEX]=     CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS;
   sql_command_flags[SQLCOM_CREATE_VIEW]=    CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE |
                                             CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_DROP_VIEW]=      CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
@@ -344,13 +350,12 @@ void init_update_queries(void)
   sql_command_flags[SQLCOM_SHOW_PROFILES]=    CF_STATUS_COMMAND;
   sql_command_flags[SQLCOM_SHOW_PROFILE]=     CF_STATUS_COMMAND;
   sql_command_flags[SQLCOM_BINLOG_BASE64_EVENT]= CF_STATUS_COMMAND;
-
-   sql_command_flags[SQLCOM_SHOW_TABLES]=       (CF_STATUS_COMMAND |
-                                                 CF_SHOW_TABLE_COMMAND |
-                                                 CF_REEXECUTION_FRAGILE);
-  sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND |
-                                                CF_SHOW_TABLE_COMMAND |
-                                                CF_REEXECUTION_FRAGILE);
+  sql_command_flags[SQLCOM_SHOW_CLIENT_STATS]= CF_STATUS_COMMAND;
+  sql_command_flags[SQLCOM_SHOW_USER_STATS]=   CF_STATUS_COMMAND;
+  sql_command_flags[SQLCOM_SHOW_TABLE_STATS]=  CF_STATUS_COMMAND;
+  sql_command_flags[SQLCOM_SHOW_INDEX_STATS]=  CF_STATUS_COMMAND;
+  sql_command_flags[SQLCOM_SHOW_TABLES]=       (CF_STATUS_COMMAND | CF_SHOW_TABLE_COMMAND | CF_REEXECUTION_FRAGILE);
+  sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND | CF_SHOW_TABLE_COMMAND | CF_REEXECUTION_FRAGILE);
 
 
   sql_command_flags[SQLCOM_CREATE_USER]=       CF_CHANGES_DATA;
@@ -383,10 +388,11 @@ void init_update_queries(void)
     The following admin table operations are allowed
     on log tables.
   */
-  sql_command_flags[SQLCOM_REPAIR]=    CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS;
-  sql_command_flags[SQLCOM_OPTIMIZE]|= CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS;
-  sql_command_flags[SQLCOM_ANALYZE]=   CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS;
-  sql_command_flags[SQLCOM_CHECK]=     CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS;
+  sql_command_flags[SQLCOM_REPAIR]=    CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_OPTIMIZE]|= CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_ANALYZE]=   CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_CHECK]=     CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_CHECKSUM]=  CF_REPORT_PROGRESS;
 
   sql_command_flags[SQLCOM_CREATE_USER]|=       CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_DROP_USER]|=         CF_AUTO_COMMIT_TRANS;
@@ -413,7 +419,7 @@ bool sqlcom_can_generate_row_events(const THD *thd)
  
 bool is_update_query(enum enum_sql_command command)
 {
-  DBUG_ASSERT(command >= 0 && command <= SQLCOM_END);
+  DBUG_ASSERT(command <= SQLCOM_END);
   return (sql_command_flags[command] & CF_CHANGES_DATA) != 0;
 }
 
@@ -424,7 +430,7 @@ bool is_update_query(enum enum_sql_command command)
 */
 bool is_log_table_write_query(enum enum_sql_command command)
 {
-  DBUG_ASSERT(command >= 0 && command <= SQLCOM_END);
+  DBUG_ASSERT(command <= SQLCOM_END);
   return (sql_command_flags[command] & CF_WRITE_LOGS_COMMAND) != 0;
 }
 
@@ -477,7 +483,7 @@ void execute_init_command(THD *thd, LEX_STRING *init_command,
 static void handle_bootstrap_impl(THD *thd)
 {
   MYSQL_FILE *file= bootstrap_file;
-  char *buff;
+  char *buff, *res;
 
   DBUG_ENTER("handle_bootstrap");
 
@@ -517,7 +523,13 @@ static void handle_bootstrap_impl(THD *thd)
         break;
       }
       buff= (char*) thd->net.buff;
-      mysql_file_fgets(buff + length, thd->net.max_packet - length, file);
+      res= mysql_file_fgets(buff + length, thd->net.max_packet - length, file);
+      if (!res && !mysql_file_feof(file))
+      {
+        thd->protocol->end_statement();
+        bootstrap_error= 1;
+        break;
+      }
       length+= (ulong) strlen(buff + length);
       /* purecov: end */
     }
@@ -698,6 +710,10 @@ bool do_command(THD *thd)
 
   net_new_transaction(net);
 
+
+  /* Save for user statistics */
+  thd->start_bytes_received= thd->status_var.bytes_received;
+
   /*
     Synchronization point for testing of KILL_CONNECTION.
     This sync point can wait here, to simulate slow code execution
@@ -870,7 +886,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   NET *net= &thd->net;
   bool error= 0;
   DBUG_ENTER("dispatch_command");
-  DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command));
+  DBUG_PRINT("info", ("command: %d", command));
 
 #if defined(ENABLED_PROFILING)
   thd->profiling.start_new_query();
@@ -879,26 +895,19 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
                       &thd->security_ctx->priv_user[0],
                       (char *) thd->security_ctx->host_or_ip);
   
+  DBUG_EXECUTE_IF("crash_dispatch_command_before",
+                  { DBUG_PRINT("crash_dispatch_command_before", ("now"));
+                    DBUG_ABORT(); });
+
   thd->command=command;
   /*
     Commands which always take a long time are logged into
     the slow log only if opt_log_slow_admin_statements is set.
   */
   thd->enable_slow_log= TRUE;
+  thd->query_plan_flags= QPLAN_INIT;
   thd->lex->sql_command= SQLCOM_END; /* to avoid confusing VIEW detectors */
   thd->set_time();
-  if (!thd->is_valid_time())
-  {
-    /*
-     If the time has got past 2038 we need to shut this server down
-     We do this by making sure every command is a shutdown and we 
-     have enough privileges to shut the server down
-
-     TODO: remove this when we have full 64 bit my_time_t support
-    */
-    thd->security_ctx->master_access|= SHUTDOWN_ACL;
-    command= COM_SHUTDOWN;
-  }
   thd->set_query_id(get_query_id());
   if (!(server_command_flags[command] & CF_SKIP_QUERY_ID))
     next_query_id();
@@ -956,14 +965,21 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
     CHARSET_INFO *save_character_set_results=
       thd->variables.character_set_results;
 
+    /* Ensure we don't free security_ctx->user in case we have to revert */
+    thd->security_ctx->user= 0;
+    thd->user_connect= 0;
+
     rc= acl_authenticate(thd, 0, packet_length);
     MYSQL_AUDIT_NOTIFY_CONNECTION_CHANGE_USER(thd);
     if (rc)
     {
+      /* Free user if allocated by acl_authenticate */
       my_free(thd->security_ctx->user);
       *thd->security_ctx= save_security_ctx;
+      if (thd->user_connect)
+	decrease_user_connections(thd->user_connect);
       thd->user_connect= save_user_connect;
-      thd->reset_db (save_db, save_db_length);
+      thd->reset_db(save_db, save_db_length);
       thd->variables.character_set_client= save_character_set_client;
       thd->variables.collation_connection= save_collation_connection;
       thd->variables.character_set_results= save_character_set_results;
@@ -1020,8 +1036,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
                       &thd->security_ctx->priv_user[0],
                       (char *) thd->security_ctx->host_or_ip);
     char *packet_end= thd->query() + thd->query_length();
-    /* 'b' stands for 'buffer' parameter', special for 'my_snprintf' */
-
     general_log_write(thd, command, thd->query(), thd->query_length());
     DBUG_PRINT("query",("%-.4096s",thd->query()));
 #if defined(ENABLED_PROFILING)
@@ -1110,12 +1124,13 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
       break;
     /*
       We have name + wildcard in packet, separated by endzero
+      (The packet is guaranteed to end with an end zero)
     */
     arg_end= strend(packet);
     uint arg_length= arg_end - packet;
 
     /* Check given table name length. */
-    if (arg_length >= packet_length || arg_length > NAME_LEN)
+    if (packet_length - arg_length > NAME_LEN + 1 || arg_length > SAFE_NAME_LEN)
     {
       my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
       break;
@@ -1129,7 +1144,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
       break;
     }
     packet= arg_end + 1;
-    mysql_reset_thd_for_next_command(thd);
+    mysql_reset_thd_for_next_command(thd, opt_userstat_running);
     lex_start(thd);
     /* Must be before we init the table list. */
     if (lower_case_table_names)
@@ -1194,7 +1209,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
       uint32 slave_server_id;
 
       status_var_increment(thd->status_var.com_other);
+
       thd->enable_slow_log= opt_log_slow_admin_statements;
+      thd->query_plan_flags|= QPLAN_ADMIN;
       if (check_global_access(thd, REPL_SLAVE_ACL))
 	break;
 
@@ -1269,10 +1286,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
       packet[0].
     */
     enum mysql_enum_shutdown_level level;
-    if (!thd->is_valid_time())
-      level= SHUTDOWN_DEFAULT;
-    else
-      level= (enum mysql_enum_shutdown_level) (uchar) packet[0];
+    level= (enum mysql_enum_shutdown_level) (uchar) packet[0];
     if (level == SHUTDOWN_DEFAULT)
       level= SHUTDOWN_WAIT_ALL_BUFFERS; // soon default will be configurable
     else if (level != SHUTDOWN_WAIT_ALL_BUFFERS)
@@ -1290,16 +1304,19 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
 #endif
   case COM_STATISTICS:
   {
-    STATUS_VAR current_global_status_var;
+    STATUS_VAR *current_global_status_var;      // Big; Don't allocate on stack
     ulong uptime;
-    uint length __attribute__((unused));
+    uint length;
     ulonglong queries_per_second1000;
     char buff[250];
     uint buff_len= sizeof(buff);
 
+    if (!(current_global_status_var= (STATUS_VAR*)
+          thd->alloc(sizeof(STATUS_VAR))))
+      break;
     general_log_print(thd, command, NullS);
     status_var_increment(thd->status_var.com_stat[SQLCOM_SHOW_STATUS]);
-    calc_sum_of_all_status(&current_global_status_var);
+    calc_sum_of_all_status(current_global_status_var);
     if (!(uptime= (ulong) (thd->start_time - server_start_time)))
       queries_per_second1000= 0;
     else
@@ -1311,8 +1328,8 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
                         "Open tables: %u  Queries per second avg: %u.%03u",
                         uptime,
                         (int) thread_count, (ulong) thd->query_id,
-                        current_global_status_var.long_query_count,
-                        current_global_status_var.opened_tables,
+                        current_global_status_var->long_query_count,
+                        current_global_status_var->opened_tables,
                         refresh_version,
                         cached_open_tables(),
                         (uint) (queries_per_second1000 / 1000),
@@ -1401,6 +1418,8 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
                       thd->stmt_da->is_error() ? thd->stmt_da->sql_errno() : 0,
                       command_name[command].str);
 
+  thd->update_all_stats();
+
   log_slow_statement(thd);
 
   thd_proc_info(thd, "cleaning up");
@@ -1440,6 +1459,18 @@ void log_slow_statement(THD *thd)
   if (unlikely(thd->in_sub_stmt))
     DBUG_VOID_RETURN;                           // Don't set time for sub stmt
 
+  /* Follow the slow log filter configuration. */ 
+  if (!(thd->variables.log_slow_filter & thd->query_plan_flags))
+    DBUG_VOID_RETURN; 
+ 
+  /* 
+     If rate limiting of slow log writes is enabled, decide whether to log
+     this query to the log or not.
+  */ 
+  if (thd->variables.log_slow_rate_limit > 1 &&
+      (global_query_id % thd->variables.log_slow_rate_limit) != 0)
+    DBUG_VOID_RETURN;
+
   /*
     Do not log administrative statements unless the appropriate option is
     set.
@@ -1569,6 +1600,12 @@ int prepare_schema_table(THD *thd, LEX *lex, Table_ident *table_ident,
     thd->profiling.discard_current_query();
 #endif
     break;
+  case SCH_USER_STATS:
+  case SCH_CLIENT_STATS:
+    if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
+      DBUG_RETURN(1);
+  case SCH_TABLE_STATS:
+  case SCH_INDEX_STATS:
   case SCH_OPEN_TABLES:
   case SCH_VARIABLES:
   case SCH_STATUS:
@@ -2016,6 +2053,8 @@ mysql_execute_command(THD *thd)
 #endif
 
   status_var_increment(thd->status_var.com_stat[lex->sql_command]);
+  thd->progress.report_to_client= test(sql_command_flags[lex->sql_command] &
+                                       CF_REPORT_PROGRESS);
 
   DBUG_ASSERT(thd->transaction.stmt.modified_non_trans_table == FALSE);
 
@@ -2062,23 +2101,7 @@ mysql_execute_command(THD *thd)
     break;
   case SQLCOM_SHOW_STATUS:
   {
-    system_status_var old_status_var= thd->status_var;
-    thd->initial_status_var= &old_status_var;
-    if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE,
-                                  UINT_MAX, FALSE)))
-      res= execute_sqlcom_select(thd, all_tables);
-    /* Don't log SHOW STATUS commands to slow query log */
-    thd->server_status&= ~(SERVER_QUERY_NO_INDEX_USED |
-                           SERVER_QUERY_NO_GOOD_INDEX_USED);
-    /*
-      restore status variables, as we don't want 'show status' to cause
-      changes
-    */
-    mysql_mutex_lock(&LOCK_status);
-    add_diff_to_status(&global_status_var, &thd->status_var,
-                       &old_status_var);
-    thd->status_var= old_status_var;
-    mysql_mutex_unlock(&LOCK_status);
+    execute_show_status(thd, all_tables);
     break;
   }
   case SQLCOM_SHOW_DATABASES:
@@ -2094,6 +2117,10 @@ mysql_execute_command(THD *thd)
   case SQLCOM_SHOW_COLLATIONS:
   case SQLCOM_SHOW_STORAGE_ENGINES:
   case SQLCOM_SHOW_PROFILE:
+  case SQLCOM_SHOW_CLIENT_STATS:
+  case SQLCOM_SHOW_USER_STATS:
+  case SQLCOM_SHOW_TABLE_STATS:
+  case SQLCOM_SHOW_INDEX_STATS:
   case SQLCOM_SELECT:
   {
     thd->status_var.last_query_cost= 0.0;
@@ -2173,11 +2200,7 @@ case SQLCOM_PREPARE:
       goto error;
     }
     it= new Item_func_unix_timestamp(it);
-    /*
-      it is OK only emulate fix_fieds, because we need only
-      value of constant
-    */
-    it->quick_fix_field();
+    it->fix_fields(thd, &it);
     res = purge_master_logs_before_date(thd, (ulong)it->val_int());
     break;
   }
@@ -2560,6 +2583,7 @@ end_with_restore_list:
       ALTER TABLE.
     */
     thd->enable_slow_log= opt_log_slow_admin_statements;
+    thd->query_plan_flags|= QPLAN_ADMIN;
 
     bzero((char*) &create_info, sizeof(create_info));
     create_info.db_type= 0;
@@ -2568,7 +2592,7 @@ end_with_restore_list:
 
     res= mysql_alter_table(thd, first_table->db, first_table->table_name,
                            &create_info, first_table, &alter_info,
-                           0, (ORDER*) 0, 0);
+                           0, (ORDER*) 0, 0, 0);
     break;
   }
 #ifdef HAVE_REPLICATION
@@ -2610,35 +2634,7 @@ end_with_restore_list:
 
   case SQLCOM_RENAME_TABLE:
   {
-    DBUG_ASSERT(first_table == all_tables && first_table != 0);
-    TABLE_LIST *table;
-    for (table= first_table; table; table= table->next_local->next_local)
-    {
-      if (check_access(thd, ALTER_ACL | DROP_ACL, table->db,
-                       &table->grant.privilege,
-                       &table->grant.m_internal,
-                       0, 0) ||
-          check_access(thd, INSERT_ACL | CREATE_ACL, table->next_local->db,
-                       &table->next_local->grant.privilege,
-                       &table->next_local->grant.m_internal,
-                       0, 0))
-	goto error;
-      TABLE_LIST old_list, new_list;
-      /*
-        we do not need initialize old_list and new_list because we will
-        come table[0] and table->next[0] there
-      */
-      old_list= table[0];
-      new_list= table->next_local[0];
-      if (check_grant(thd, ALTER_ACL | DROP_ACL, &old_list, FALSE, 1, FALSE) ||
-         (!test_all_bits(table->next_local->grant.privilege,
-                         INSERT_ACL | CREATE_ACL) &&
-          check_grant(thd, INSERT_ACL | CREATE_ACL, &new_list, FALSE, 1,
-                      FALSE)))
-        goto error;
-    }
-
-    if (mysql_rename_tables(thd, first_table, 0))
+    if (execute_rename_table(thd, first_table, all_tables))
       goto error;
     break;
   }
@@ -2893,12 +2889,17 @@ end_with_restore_list:
 
     DBUG_EXECUTE_IF("after_mysql_insert",
                     {
-                      const char act[]=
+                      const char act1[]=
                         "now "
                         "wait_for signal.continue";
+                      const char act2[]=
+                        "now "
+                        "signal signal.continued";
                       DBUG_ASSERT(opt_debug_sync_timeout > 0);
-                      DBUG_ASSERT(!debug_sync_set_action(current_thd,
-                                                         STRING_WITH_LEN(act)));
+                      DBUG_ASSERT(!debug_sync_set_action(thd,
+                                                         STRING_WITH_LEN(act1)));
+                      DBUG_ASSERT(!debug_sync_set_action(thd,
+                                                         STRING_WITH_LEN(act2)));
                     };);
     break;
   }
@@ -2939,6 +2940,10 @@ end_with_restore_list:
     if (!(res= open_and_lock_tables(thd, all_tables, TRUE, 0)))
     {
       MYSQL_INSERT_SELECT_START(thd->query());
+      /*
+        Only the INSERT table should be merged. Other will be handled by
+        select.
+      */
       /* Skip first table, which is the table we are inserting in */
       TABLE_LIST *second_table= first_table->next_local;
       select_lex->table_list.first= second_table;
@@ -3216,7 +3221,7 @@ end_with_restore_list:
     {
 #ifdef HAVE_QUERY_CACHE
       if (thd->variables.query_cache_wlock_invalidate)
-        query_cache.invalidate_locked_for_write(first_table);
+	query_cache.invalidate_locked_for_write(thd, first_table);
 #endif /*HAVE_QUERY_CACHE*/
       my_ok(thd);
     }
@@ -4390,6 +4395,7 @@ create_sp_error:
   case SQLCOM_REPAIR:
   case SQLCOM_TRUNCATE:
   case SQLCOM_ALTER_TABLE:
+      thd->query_plan_flags|= QPLAN_ADMIN;
       DBUG_ASSERT(first_table == all_tables && first_table != 0);
     /* fall through */
   case SQLCOM_SIGNAL:
@@ -4405,6 +4411,7 @@ create_sp_error:
     break;
   }
   thd_proc_info(thd, "query end");
+  thd->update_stats();
 
   /*
     Binlog-related cleanup:
@@ -4511,6 +4518,7 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables)
       param->select_limit=
         new Item_int((ulonglong) thd->variables.select_limit);
   }
+  thd->thd_marker.emb_on_expr_nest= NULL;
   if (!(res= open_and_lock_tables(thd, all_tables, TRUE, 0)))
   {
     if (lex->describe)
@@ -4539,9 +4547,8 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables)
           mysqld_show_warnings().
         */
         thd->lex->unit.print(&str, QT_TO_SYSTEM_CHARSET);
-        str.append('\0');
         push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
-                     ER_YES, str.ptr());
+                     ER_YES, str.c_ptr_safe());
       }
       if (res)
         result->abort_result_set();
@@ -4559,10 +4566,73 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables)
         delete result;
     }
   }
+  /* Count number of empty select queries */
+  if (!thd->sent_row_count)
+    status_var_increment(thd->status_var.empty_queries);
+  status_var_add(thd->status_var.rows_sent, thd->sent_row_count);
   return res;
 }
 
 
+static bool execute_show_status(THD *thd, TABLE_LIST *all_tables)
+{
+  bool res;
+  system_status_var old_status_var= thd->status_var;
+  thd->initial_status_var= &old_status_var;
+  if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE,
+                                UINT_MAX, FALSE)))
+    res= execute_sqlcom_select(thd, all_tables);
+  /* Don't log SHOW STATUS commands to slow query log */
+  thd->server_status&= ~(SERVER_QUERY_NO_INDEX_USED |
+                         SERVER_QUERY_NO_GOOD_INDEX_USED);
+  /*
+    restore status variables, as we don't want 'show status' to cause
+    changes
+  */
+  mysql_mutex_lock(&LOCK_status);
+  add_diff_to_status(&global_status_var, &thd->status_var,
+                     &old_status_var);
+  thd->status_var= old_status_var;
+  mysql_mutex_unlock(&LOCK_status);
+  return res;
+}
+
+
+static bool execute_rename_table(THD *thd, TABLE_LIST *first_table,
+                                 TABLE_LIST *all_tables)
+{
+  DBUG_ASSERT(first_table == all_tables && first_table != 0);
+  TABLE_LIST *table;
+  for (table= first_table; table; table= table->next_local->next_local)
+  {
+    if (check_access(thd, ALTER_ACL | DROP_ACL, table->db,
+                     &table->grant.privilege,
+                     &table->grant.m_internal,
+                     0, 0) ||
+        check_access(thd, INSERT_ACL | CREATE_ACL, table->next_local->db,
+                     &table->next_local->grant.privilege,
+                     &table->next_local->grant.m_internal,
+                     0, 0))
+      return 1;
+    TABLE_LIST old_list, new_list;
+    /*
+      we do not need initialize old_list and new_list because we will
+      come table[0] and table->next[0] there
+    */
+    old_list= table[0];
+    new_list= table->next_local[0];
+    if (check_grant(thd, ALTER_ACL | DROP_ACL, &old_list, FALSE, 1, FALSE) ||
+       (!test_all_bits(table->next_local->grant.privilege,
+                       INSERT_ACL | CREATE_ACL) &&
+        check_grant(thd, INSERT_ACL | CREATE_ACL, &new_list, FALSE, 1,
+                    FALSE)))
+      return 1;
+  }
+
+  return mysql_rename_tables(thd, first_table, 0);
+}
+
+
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
 /**
   Check grants for commands which work only with one table.
@@ -4742,6 +4812,7 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
       case ACL_INTERNAL_ACCESS_DENIED:
         if (! no_errors)
         {
+          status_var_increment(thd->status_var.access_denied_errors);
           my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
                    sctx->priv_user, sctx->priv_host, db);
         }
@@ -4792,17 +4863,13 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
     DBUG_PRINT("error",("No possible access"));
     if (!no_errors)
     {
-      if (thd->password == 2)
-        my_error(ER_ACCESS_DENIED_NO_PASSWORD_ERROR, MYF(0),
-                 sctx->priv_user,
-                 sctx->priv_host);
-      else
-        my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
-                 sctx->priv_user,
-                 sctx->priv_host,
-                 (thd->password ?
-                  ER(ER_YES) :
-                  ER(ER_NO)));                    /* purecov: tested */
+      status_var_increment(thd->status_var.access_denied_errors);
+      my_error(access_denied_error_code(thd->password), MYF(0),
+               sctx->priv_user,
+               sctx->priv_host,
+               (thd->password ?
+                ER(ER_YES) :
+                ER(ER_NO)));                    /* purecov: tested */
     }
     DBUG_RETURN(TRUE);				/* purecov: tested */
   }
@@ -4861,13 +4928,15 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
   */
   DBUG_PRINT("error",("Access denied"));
   if (!no_errors)
+  {
+    status_var_increment(thd->status_var.access_denied_errors);
     my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
              sctx->priv_user, sctx->priv_host,
              (db ? db : (thd->db ?
                          thd->db :
                          "unknown")));
+  }
   DBUG_RETURN(TRUE);
-
 }
 
 
@@ -4906,6 +4975,7 @@ static bool check_show_access(THD *thd, TABLE_LIST *table)
 
     if (!thd->col_access && check_grant_db(thd, dst_db_name))
     {
+      status_var_increment(thd->status_var.access_denied_errors);
       my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
                thd->security_ctx->priv_user,
                thd->security_ctx->priv_host,
@@ -4991,8 +5061,8 @@ check_table_access(THD *thd, ulong requirements,TABLE_LIST *tables,
 {
   TABLE_LIST *org_tables= tables;
   TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table();
-  uint i= 0;
   Security_context *sctx= thd->security_ctx, *backup_ctx= thd->security_ctx;
+  uint i= 0;
   /*
     The check that first_not_own_table is not reached is for the case when
     the given table list refers to the list for prelocking (contains tables
@@ -5177,6 +5247,7 @@ bool check_global_access(THD *thd, ulong want_access)
     return 0;
   get_privilege_desc(command, sizeof(command), want_access);
   my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command);
+  status_var_increment(thd->status_var.access_denied_errors);
   return 1;
 #else
   return 0;
@@ -5284,17 +5355,18 @@ bool my_yyoverflow(short **yyss, YYSTYPE **yyvs, ulong *yystacksize)
 
   @todo Call it after we use THD for queries, not before.
 */
-void mysql_reset_thd_for_next_command(THD *thd)
+void mysql_reset_thd_for_next_command(THD *thd, my_bool calculate_userstat)
 {
-  thd->reset_for_next_command();
+  thd->reset_for_next_command(calculate_userstat);
 }
 
-void THD::reset_for_next_command()
+void THD::reset_for_next_command(bool calculate_userstat)
 {
   THD *thd= this;
   DBUG_ENTER("mysql_reset_thd_for_next_command");
   DBUG_ASSERT(!thd->spcont); /* not for substatements of routines */
   DBUG_ASSERT(! thd->in_sub_stmt);
+  DBUG_ASSERT(thd->transaction.on);
   thd->free_list= 0;
   thd->select_number= 1;
   /*
@@ -5305,6 +5377,7 @@ void THD::reset_for_next_command()
   thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0;
 
   thd->query_start_used= 0;
+  thd->query_start_sec_part_used= 0;
   thd->is_fatal_error= thd->time_zone_used= 0;
   /*
     Clear the status flag that are expected to be cleared at the
@@ -5335,6 +5408,18 @@ void THD::reset_for_next_command()
   thd->rand_used= 0;
   thd->sent_row_count= thd->examined_row_count= 0;
 
+  /* Copy data for user stats */
+  if ((thd->userstat_running= calculate_userstat))
+  {
+    thd->start_cpu_time= my_getcputime();
+    memcpy(&thd->org_status_var, &thd->status_var, sizeof(thd->status_var));
+    thd->select_commands= thd->update_commands= thd->other_commands= 0;
+  }
+
+  thd->query_plan_flags= QPLAN_INIT;
+  thd->query_plan_fsort_passes= 0;
+  thd->thd_marker.emb_on_expr_nest= NULL;
+
   thd->reset_current_stmt_binlog_format_row();
   thd->binlog_unsafe_warning_flags= 0;
 
@@ -5520,7 +5605,6 @@ void mysql_parse(THD *thd, char *rawbuf, uint length,
 {
   int error __attribute__((unused));
   DBUG_ENTER("mysql_parse");
-
   DBUG_EXECUTE_IF("parser_debug", turn_parser_debug_on(););
 
   /*
@@ -5540,7 +5624,7 @@ void mysql_parse(THD *thd, char *rawbuf, uint length,
     FIXME: cleanup the dependencies in the code to simplify this.
   */
   lex_start(thd);
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
   if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0)
   {
@@ -5609,7 +5693,11 @@ void mysql_parse(THD *thd, char *rawbuf, uint length,
     thd->cleanup_after_query();
     DBUG_ASSERT(thd->change_list.is_empty());
   }
-
+  else
+  {
+    /* Update statistics for getting the query from the cache */
+    thd->lex->sql_command= SQLCOM_SELECT;
+  }
   DBUG_VOID_RETURN;
 }
 
@@ -5635,7 +5723,7 @@ bool mysql_test_parse_for_slave(THD *thd, char *rawbuf, uint length)
   if (!(error= parser_state.init(thd, rawbuf, length)))
   {
     lex_start(thd);
-    mysql_reset_thd_for_next_command(thd);
+    mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
     if (!parse_sql(thd, & parser_state, NULL) &&
         all_tables_not_ok(thd, lex->select_lex.table_list.first))
@@ -5663,7 +5751,9 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
                        LEX_STRING *comment,
 		       char *change,
                        List<String> *interval_list, CHARSET_INFO *cs,
-		       uint uint_geom_type)
+		       uint uint_geom_type,
+		       Virtual_column_info *vcol_info,
+                       engine_option_value *create_options)
 {
   register Create_field *new_field;
   LEX  *lex= thd->lex;
@@ -5681,7 +5771,7 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
     lex->col_list.push_back(new Key_part_spec(*field_name, 0));
     key= new Key(Key::PRIMARY, null_lex_str,
                       &default_key_create_info,
-                      0, lex->col_list);
+                      0, lex->col_list, NULL);
     lex->alter_info.key_list.push_back(key);
     lex->col_list.empty();
   }
@@ -5691,7 +5781,7 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
     lex->col_list.push_back(new Key_part_spec(*field_name, 0));
     key= new Key(Key::UNIQUE, null_lex_str,
                  &default_key_create_info, 0,
-                 lex->col_list);
+                 lex->col_list, NULL);
     lex->alter_info.key_list.push_back(key);
     lex->col_list.empty();
   }
@@ -5738,7 +5828,8 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
   if (!(new_field= new Create_field()) ||
       new_field->init(thd, field_name->str, type, length, decimals, type_modifier,
                       default_value, on_update_value, comment, change,
-                      interval_list, cs, uint_geom_type))
+                      interval_list, cs, uint_geom_type, vcol_info,
+                      create_options))
     DBUG_RETURN(1);
 
   lex->alter_info.create_list.push_back(new_field);
@@ -6309,6 +6400,28 @@ push_new_name_resolution_context(THD *thd,
 
 
 /**
+  Fix condition which contains only field (f turns to  f <> 0 )
+
+  @param cond            The condition to fix
+
+  @return fixed condition
+*/
+
+Item *normalize_cond(Item *cond)
+{
+  if (cond)
+  {
+    Item::Type type= cond->type();
+    if (type == Item::FIELD_ITEM || type == Item::REF_ITEM)
+    {
+      cond= new Item_func_ne(cond, new Item_int(0));
+    }
+  }
+  return cond;
+}
+
+
+/**
   Add an ON condition to the second operand of a JOIN ... ON.
 
     Add an ON condition to the right operand of a JOIN ... ON clause.
@@ -6326,6 +6439,7 @@ void add_join_on(TABLE_LIST *b, Item *expr)
 {
   if (expr)
   {
+    expr= normalize_cond(expr);
     if (!b->on_expr)
       b->on_expr= expr;
     else
@@ -6415,7 +6529,6 @@ uint kill_one_thread(THD *thd, ulong id, bool only_kill_query)
   mysql_mutex_unlock(&LOCK_thread_count);
   if (tmp)
   {
-
     /*
       If we're SUPER, we can KILL anything, including system-threads.
       No further checks.
@@ -7197,15 +7310,14 @@ bool check_string_char_length(LEX_STRING *str, const char *err_msg,
 
 /*
   Check if path does not contain mysql data home directory
+
   SYNOPSIS
     test_if_data_home_dir()
     dir                     directory
-    conv_home_dir           converted data home directory
-    home_dir_len            converted data home directory length
 
   RETURN VALUES
     0	ok
-    1	error  
+    1	error ;  Given path contains data directory
 */
 C_MODE_START
 
@@ -7233,11 +7345,17 @@ int test_if_data_home_dir(const char *dir)
                         mysql_unpacked_real_data_home_len,
                         (const uchar*) mysql_unpacked_real_data_home,
                         mysql_unpacked_real_data_home_len))
+      {
+        DBUG_PRINT("error", ("Path is part of mysql_real_data_home"));
         DBUG_RETURN(1);
+      }
     }
     else if (!memcmp(path, mysql_unpacked_real_data_home,
                      mysql_unpacked_real_data_home_len))
+    {
+      DBUG_PRINT("error", ("Path is part of mysql_real_data_home"));
       DBUG_RETURN(1);
+    }
   }
   DBUG_RETURN(0);
 }
@@ -7299,6 +7417,7 @@ bool parse_sql(THD *thd,
                Object_creation_ctx *creation_ctx)
 {
   bool ret_value;
+  DBUG_ENTER("parse_sql");
   DBUG_ASSERT(thd->m_parser_state == NULL);
   DBUG_ASSERT(thd->lex->m_stmt == NULL);
 
@@ -7346,7 +7465,7 @@ bool parse_sql(THD *thd,
 
   ret_value= mysql_parse_status || thd->is_fatal_error;
   MYSQL_QUERY_PARSE_DONE(ret_value);
-  return ret_value;
+  DBUG_RETURN(ret_value);
 }
 
 /**
diff --git a/sql/sql_parse.h b/sql/sql_parse.h
index 9a55174b0fb..2c31f71af53 100644
--- a/sql/sql_parse.h
+++ b/sql/sql_parse.h
@@ -85,7 +85,7 @@ bool alloc_query(THD *thd, const char *packet, uint packet_length);
 void mysql_init_select(LEX *lex);
 void mysql_parse(THD *thd, char *rawbuf, uint length,
                  Parser_state *parser_state);
-void mysql_reset_thd_for_next_command(THD *thd);
+void mysql_reset_thd_for_next_command(THD *thd, my_bool calculate_userstat);
 bool mysql_new_select(LEX *lex, bool move_down);
 void create_select_for_variable(const char *var_name);
 void create_table_set_open_action_and_adjust_tables(LEX *lex);
@@ -112,7 +112,9 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum enum_field_types t
 		       LEX_STRING *comment,
 		       char *change, List<String> *interval_list,
 		       CHARSET_INFO *cs,
-		       uint uint_geom_type);
+		       uint uint_geom_type,
+                       Virtual_column_info *vcol_info,
+                       engine_option_value *create_options);
 bool add_to_list(THD *thd, SQL_I_List<ORDER> &list, Item *group, bool asc);
 void add_join_on(TABLE_LIST *b,Item *expr);
 void add_join_natural(TABLE_LIST *a,TABLE_LIST *b,List<String> *using_fields,
@@ -124,6 +126,7 @@ bool push_new_name_resolution_context(THD *thd,
 void store_position_for_column(const char *name);
 void init_update_queries(void);
 bool check_simple_select();
+Item *normalize_cond(Item *cond);
 Item *negate_expression(THD *thd, Item *expr);
 bool check_stack_overrun(THD *thd, long margin, uchar *dummy);
 
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
index 354548cbda4..646d829a51e 100644
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2005, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -952,86 +953,11 @@ int check_signed_flag(partition_info *part_info)
   return error;
 }
 
-/**
-  Initialize lex object for use in fix_fields and parsing.
-
-  SYNOPSIS
-    init_lex_with_single_table()
-    @param thd                 The thread object
-    @param table               The table object
-  @return Operation status
-    @retval TRUE                An error occurred, memory allocation error
-    @retval FALSE               Ok
-
-  DESCRIPTION
-    This function is used to initialize a lex object on the
-    stack for use by fix_fields and for parsing. In order to
-    work properly it also needs to initialize the
-    Name_resolution_context object of the lexer.
-    Finally it needs to set a couple of variables to ensure
-    proper functioning of fix_fields.
-*/
-
-static int
-init_lex_with_single_table(THD *thd, TABLE *table, LEX *lex)
-{
-  TABLE_LIST *table_list;
-  Table_ident *table_ident;
-  SELECT_LEX *select_lex= &lex->select_lex;
-  Name_resolution_context *context= &select_lex->context;
-  /*
-    We will call the parser to create a part_info struct based on the
-    partition string stored in the frm file.
-    We will use a local lex object for this purpose. However we also
-    need to set the Name_resolution_object for this lex object. We
-    do this by using add_table_to_list where we add the table that
-    we're working with to the Name_resolution_context.
-  */
-  thd->lex= lex;
-  lex_start(thd);
-  context->init();
-  if ((!(table_ident= new Table_ident(thd,
-                                      table->s->table_name,
-                                      table->s->db, TRUE))) ||
-      (!(table_list= select_lex->add_table_to_list(thd,
-                                                   table_ident,
-                                                   NULL,
-                                                   0))))
-    return TRUE;
-  context->resolve_in_table_list_only(table_list);
-  lex->use_only_table_context= TRUE;
-  select_lex->cur_pos_in_select_list= UNDEF_POS;
-  table->map= 1; //To ensure correct calculation of const item
-  table->get_fields_in_item_tree= TRUE;
-  table_list->table= table;
-  return FALSE;
-}
-
-/**
-  End use of local lex with single table
-
-  SYNOPSIS
-    end_lex_with_single_table()
-    @param thd               The thread object
-    @param table             The table object
-    @param old_lex           The real lex object connected to THD
-
-  DESCRIPTION
-    This function restores the real lex object after calling
-    init_lex_with_single_table and also restores some table
-    variables temporarily set.
+/*
+  init_lex_with_single_table and end_lex_with_single_table
+  are now in sql_lex.cc
 */
 
-static void
-end_lex_with_single_table(THD *thd, TABLE *table, LEX *old_lex)
-{
-  LEX *lex= thd->lex;
-  table->map= 0;
-  table->get_fields_in_item_tree= FALSE;
-  lex_end(lex);
-  thd->lex= old_lex;
-}
-
 /*
   The function uses a new feature in fix_fields where the flag 
   GET_FIXED_FIELDS_FLAG is set for all fields in the item tree.
@@ -1107,7 +1033,8 @@ static bool fix_fields_part_func(THD *thd, Item* func_expr, TABLE *table,
     const nesting_map saved_allow_sum_func= thd->lex->allow_sum_func;
     thd->lex->allow_sum_func= 0;
 
-    error= func_expr->fix_fields(thd, (Item**)&func_expr);
+    if (!(error= func_expr->fix_fields(thd, (Item**)&func_expr)))
+      func_expr->walk(&Item::vcol_in_partition_func_processor, 0, NULL);
 
     /*
       Restore agg_field/agg_func  and allow_sum_func,
@@ -2114,6 +2041,9 @@ static int add_partition_options(File fptr, partition_element *p_elem)
   }
   if (p_elem->part_comment)
     err+= add_keyword_string(fptr, "COMMENT", TRUE, p_elem->part_comment);
+  if (p_elem->connect_string.length)
+    err+= add_keyword_string(fptr, "CONNECTION", TRUE,
+                             p_elem->connect_string.str);
   return err + add_engine(fptr,p_elem->engine_type);
 }
 
@@ -6354,7 +6284,7 @@ static int alter_close_tables(ALTER_PARTITION_PARAM_TYPE *lpt, bool close_old)
   DBUG_ENTER("alter_close_tables");
   if (lpt->table->db_stat)
   {
-    lpt->table->file->close();
+    lpt->table->file->ha_close();
     lpt->table->db_stat= 0;                        // Mark file closed
   }
   if (close_old && lpt->old_table)
@@ -7884,8 +7814,8 @@ static uint32 get_next_partition_via_walking(PARTITION_ITERATOR *part_iter)
     field->store(part_iter->field_vals.cur++,
                  ((Field_num*)field)->unsigned_flag);
     if ((part_iter->part_info->is_sub_partitioned() &&
-        !part_iter->part_info->get_part_partition_id(part_iter->part_info,
-                                                     &part_id, &dummy)) ||
+         !part_iter->part_info->get_part_partition_id(part_iter->part_info,
+                                                      &part_id, &dummy)) ||
         !part_iter->part_info->get_partition_id(part_iter->part_info,
                                                 &part_id, &dummy))
       return part_id;
@@ -8006,4 +7936,3 @@ uint get_partition_field_store_length(Field *field)
   return store_length;
 }
 #endif
-
diff --git a/sql/sql_partition.h b/sql/sql_partition.h
index 7a7b73e9f01..d2c15ae46d8 100644
--- a/sql/sql_partition.h
+++ b/sql/sql_partition.h
@@ -96,7 +96,6 @@ bool check_partition_info(partition_info *part_info,handlerton **eng_type,
                           TABLE *table, handler *file, HA_CREATE_INFO *info);
 void set_linear_hash_mask(partition_info *part_info, uint num_parts);
 bool fix_partition_func(THD *thd, TABLE *table, bool create_table_ind);
-bool partition_key_modified(TABLE *table, const MY_BITMAP *fields);
 void get_partition_set(const TABLE *table, uchar *buf, const uint index,
                        const key_range *key_spec,
                        part_id_range *part_spec);
@@ -269,6 +268,9 @@ char *generate_partition_syntax(partition_info *part_info,
                                 bool show_partition_options,
                                 HA_CREATE_INFO *create_info,
                                 Alter_info *alter_info);
+bool partition_key_modified(TABLE *table, const MY_BITMAP *fields);
+#else
+#define partition_key_modified(X,Y) 0
 #endif
 
 void create_partition_name(char *out, const char *in1,
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 5f5e73091ff..06ead26414c 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -1,5 +1,6 @@
 /*
-   Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2005, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -33,11 +34,12 @@
 #include "sql_audit.h"
 #include <mysql/plugin_auth.h>
 #include "lock.h"                               // MYSQL_LOCK_IGNORE_TIMEOUT
+#include <mysql/plugin_auth.h>
 #define REPORT_TO_LOG  1
 #define REPORT_TO_USER 2
 
-extern struct st_mysql_plugin *mysql_optional_plugins[];
-extern struct st_mysql_plugin *mysql_mandatory_plugins[];
+extern struct st_maria_plugin *mysql_optional_plugins[];
+extern struct st_maria_plugin *mysql_mandatory_plugins[];
 
 /**
   @note The order of the enumeration is critical.
@@ -53,6 +55,18 @@ static TYPELIB global_plugin_typelib=
 char *opt_plugin_load= NULL;
 char *opt_plugin_dir_ptr;
 char opt_plugin_dir[FN_REFLEN];
+ulong plugin_maturity;
+
+/*
+  not really needed now, this map will become essential when we add more
+  maturity levels. We cannot change existing maturity constants,
+  so the next value - even if it will be MariaDB_PLUGIN_MATURITY_VERY_BUGGY -
+  will inevitably be larger than MariaDB_PLUGIN_MATURITY_STABLE.
+  To be able to compare them we use this mapping array
+*/
+uint plugin_maturity_map[]=
+{ 0, 1, 2, 3, 4, 5, 6 };
+
 /*
   When you ad a new plugin type, add both a string and make sure that the
   init and deinit array are correctly updated.
@@ -83,13 +97,13 @@ extern int finalize_audit_plugin(st_plugin_int *plugin);
 plugin_type_init plugin_type_initialize[MYSQL_MAX_PLUGIN_TYPE_NUM]=
 {
   0,ha_initialize_handlerton,0,0,initialize_schema_table,
-  initialize_audit_plugin,0,0
+  initialize_audit_plugin, 0, 0
 };
 
 plugin_type_init plugin_type_deinitialize[MYSQL_MAX_PLUGIN_TYPE_NUM]=
 {
   0,ha_finalize_handlerton,0,0,finalize_schema_table,
-  finalize_audit_plugin,0,0
+  finalize_audit_plugin, 0, 0
 };
 
 #ifdef HAVE_DLOPEN
@@ -99,6 +113,14 @@ static const char *sizeof_st_plugin_sym=
                    "_mysql_sizeof_struct_st_plugin_";
 static const char *plugin_declarations_sym= "_mysql_plugin_declarations_";
 static int min_plugin_interface_version= MYSQL_PLUGIN_INTERFACE_VERSION & ~0xFF;
+static const char *maria_plugin_interface_version_sym=
+                   "_maria_plugin_interface_version_";
+static const char *maria_sizeof_st_plugin_sym=
+                   "_maria_sizeof_struct_st_plugin_";
+static const char *maria_plugin_declarations_sym=
+                   "_maria_plugin_declarations_";
+static int min_maria_plugin_interface_version=
+                   MARIA_PLUGIN_INTERFACE_VERSION & ~0xFF;
 #endif
 
 /* Note that 'int version' must be the first field of every plugin
@@ -223,7 +245,7 @@ public:
              (plugin_var_arg->flags & PLUGIN_VAR_THDLOCAL ? SESSION : GLOBAL) |
              (plugin_var_arg->flags & PLUGIN_VAR_READONLY ? READONLY : 0),
              0, -1, NO_ARG, pluginvar_show_type(plugin_var_arg), 0, 0,
-             VARIABLE_NOT_IN_BINLOG, 0, 0, 0, 0, PARSE_NORMAL),
+             VARIABLE_NOT_IN_BINLOG, 0, 0, 0, 0),
     plugin_var(plugin_var_arg), orig_pluginvar_name(plugin_var_arg->name)
   { plugin_var->name= name_arg; }
   sys_var_pluginvar *cast_pluginvar() { return this; }
@@ -250,7 +272,7 @@ static bool plugin_load_list(MEM_ROOT *tmp_root, int *argc, char **argv,
                              const char *list);
 static int test_plugin_options(MEM_ROOT *, struct st_plugin_int *,
                                int *, char **);
-static bool register_builtin(struct st_mysql_plugin *, struct st_plugin_int *,
+static bool register_builtin(struct st_maria_plugin *, struct st_plugin_int *,
                              struct st_plugin_int **);
 static void unlock_variables(THD *thd, struct system_variables *vars);
 static void cleanup_variables(THD *thd, struct system_variables *vars);
@@ -331,7 +353,7 @@ static const char *item_val_str(struct st_mysql_value *value,
     Lets be nice and create a temporary string since the
     buffer was too small
   */
-  return current_thd->strmake(res->c_ptr_quick(), res->length());
+  return current_thd->strmake(res->ptr(), res->length());
 }
 
 
@@ -416,11 +438,232 @@ static inline void free_plugin_mem(struct st_plugin_dl *p)
     dlclose(p->handle);
 #endif
   my_free(p->dl.str);
-  if (p->version != MYSQL_PLUGIN_INTERFACE_VERSION)
+  if (p->allocated)
     my_free(p->plugins);
 }
 
 
+/**
+  Reads data from mysql plugin interface
+
+  @param plugin_dl       Structure where the data should be put
+  @param sym             Reverence on version info
+  @param dlpath          Path to the module
+  @param report          What errors should be reported
+
+  @retval FALSE OK
+  @retval TRUE  ERROR
+*/
+
+#ifdef HAVE_DLOPEN
+static my_bool read_mysql_plugin_info(struct st_plugin_dl *plugin_dl,
+                                      void *sym, char *dlpath,
+                                      int report)
+{
+  DBUG_ENTER("read_maria_plugin_info");
+  /* Determine interface version */
+  if (!sym)
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_FIND_DL_ENTRY, plugin_interface_version_sym);
+    DBUG_RETURN(TRUE);
+  }
+  plugin_dl->mariaversion= 0;
+  plugin_dl->mysqlversion= *(int *)sym;
+  /* Versioning */
+  if (plugin_dl->mysqlversion < min_plugin_interface_version ||
+      (plugin_dl->mysqlversion >> 8) > (MYSQL_PLUGIN_INTERFACE_VERSION >> 8))
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_OPEN_LIBRARY, dlpath, 0,
+                 "plugin interface version mismatch");
+    DBUG_RETURN(TRUE);
+  }
+  /* Find plugin declarations */
+  if (!(sym= dlsym(plugin_dl->handle, plugin_declarations_sym)))
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_FIND_DL_ENTRY, plugin_declarations_sym);
+    DBUG_RETURN(TRUE);
+  }
+
+  /* convert mysql declaration to maria one */
+  {
+    int i;
+    uint sizeof_st_plugin;
+    struct st_mysql_plugin *old;
+    struct st_maria_plugin *cur;
+    char *ptr= (char *)sym;
+
+    if ((sym= dlsym(plugin_dl->handle, sizeof_st_plugin_sym)))
+      sizeof_st_plugin= *(int *)sym;
+    else
+    {
+      DBUG_ASSERT(min_plugin_interface_version == 0);
+      sizeof_st_plugin= (int)offsetof(struct st_mysql_plugin, version);
+    }
+
+    for (i= 0;
+         ((struct st_mysql_plugin *)(ptr + i * sizeof_st_plugin))->info;
+         i++)
+      /* no op */;
+
+    cur= (struct st_maria_plugin*)
+          my_malloc((i + 1) * sizeof(struct st_maria_plugin),
+                    MYF(MY_ZEROFILL|MY_WME));
+    if (!cur)
+    {
+      free_plugin_mem(plugin_dl);
+      report_error(report, ER_OUTOFMEMORY,
+                   static_cast<int>(plugin_dl->dl.length));
+      DBUG_RETURN(TRUE);
+    }
+    /*
+      All st_plugin fields not initialized in the plugin explicitly, are
+      set to 0. It matches C standard behaviour for struct initializers that
+      have less values than the struct definition.
+    */
+    for (i=0;
+         (old= (struct st_mysql_plugin *)(ptr + i * sizeof_st_plugin))->info;
+         i++)
+    {
+
+      cur[i].type= old->type;
+      cur[i].info= old->info;
+      cur[i].name= old->name;
+      cur[i].author= old->author;
+      cur[i].descr= old->descr;
+      cur[i].license= old->license;
+      cur[i].init= old->init;
+      cur[i].deinit= old->deinit;
+      cur[i].version= old->version;
+      cur[i].status_vars= old->status_vars;
+      cur[i].system_vars= old->system_vars;
+      /*
+        Something like this should be added to process
+        new mysql plugin versions:
+        if (plugin_dl->mysqlversion > 0x0101)
+        {
+           cur[i].newfield= CONSTANT_MEANS_UNKNOWN;
+        }
+        else
+        {
+           cur[i].newfield= old->newfield;
+        }
+      */
+      /* Maria only fields */
+      cur[i].version_info= "Unknown";
+      cur[i].maturity= MariaDB_PLUGIN_MATURITY_UNKNOWN;
+    }
+    plugin_dl->allocated= true;
+    plugin_dl->plugins= (struct st_maria_plugin *)cur;
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Reads data from maria plugin interface
+
+  @param plugin_dl       Structure where the data should be put
+  @param sym             Reverence on version info
+  @param dlpath          Path to the module
+  @param report          what errors should be reported
+
+  @retval FALSE OK
+  @retval TRUE  ERROR
+*/
+
+static my_bool read_maria_plugin_info(struct st_plugin_dl *plugin_dl,
+                                      void *sym, char *dlpath,
+                                      int report)
+{
+  DBUG_ENTER("read_maria_plugin_info");
+
+  /* Determine interface version */
+  if (!(sym))
+  {
+    /*
+      Actually this branch impossible because in case of absence of maria
+      version we try mysql version.
+    */
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_FIND_DL_ENTRY,
+                 maria_plugin_interface_version_sym);
+    DBUG_RETURN(TRUE);
+  }
+  plugin_dl->mariaversion= *(int *)sym;
+  plugin_dl->mysqlversion= 0;
+  /* Versioning */
+  if (plugin_dl->mariaversion < min_maria_plugin_interface_version ||
+      (plugin_dl->mariaversion >> 8) > (MARIA_PLUGIN_INTERFACE_VERSION >> 8))
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_OPEN_LIBRARY, dlpath, 0,
+                 "plugin interface version mismatch");
+    DBUG_RETURN(TRUE);
+  }
+  /* Find plugin declarations */
+  if (!(sym= dlsym(plugin_dl->handle, maria_plugin_declarations_sym)))
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_FIND_DL_ENTRY, maria_plugin_declarations_sym);
+    DBUG_RETURN(TRUE);
+  }
+  if (plugin_dl->mariaversion != MARIA_PLUGIN_INTERFACE_VERSION)
+  {
+    uint sizeof_st_plugin;
+    struct st_maria_plugin *old, *cur;
+    char *ptr= (char *)sym;
+
+    if ((sym= dlsym(plugin_dl->handle, maria_sizeof_st_plugin_sym)))
+      sizeof_st_plugin= *(int *)sym;
+    else
+    {
+      free_plugin_mem(plugin_dl);
+      report_error(report, ER_CANT_FIND_DL_ENTRY, maria_sizeof_st_plugin_sym);
+      DBUG_RETURN(TRUE);
+    }
+
+    if (sizeof_st_plugin != sizeof(st_mysql_plugin))
+    {
+      int i;
+      for (i= 0;
+           ((struct st_maria_plugin *)(ptr + i * sizeof_st_plugin))->info;
+           i++)
+        /* no op */;
+
+      cur= (struct st_maria_plugin*)
+        my_malloc((i + 1) * sizeof(struct st_maria_plugin),
+                  MYF(MY_ZEROFILL|MY_WME));
+      if (!cur)
+      {
+        free_plugin_mem(plugin_dl);
+        report_error(report, ER_OUTOFMEMORY,
+                     static_cast<int>(plugin_dl->dl.length));
+        DBUG_RETURN(TRUE);
+      }
+      /*
+        All st_plugin fields not initialized in the plugin explicitly, are
+        set to 0. It matches C standard behaviour for struct initializers that
+        have less values than the struct definition.
+      */
+      for (i=0;
+           (old= (struct st_maria_plugin *)(ptr + i * sizeof_st_plugin))->info;
+           i++)
+        memcpy(cur + i, old, min(sizeof(cur[i]), sizeof_st_plugin));
+
+      sym= cur;
+      plugin_dl->allocated= true;
+    }
+  }
+  plugin_dl->plugins= (struct st_maria_plugin *)sym;
+
+  DBUG_RETURN(FALSE);
+}
+#endif /* HAVE_DLOPEN */
+
 static st_plugin_dl *plugin_dl_add(const LEX_STRING *dl, int report)
 {
 #ifdef HAVE_DLOPEN
@@ -470,22 +713,21 @@ static st_plugin_dl *plugin_dl_add(const LEX_STRING *dl, int report)
     report_error(report, ER_CANT_OPEN_LIBRARY, dlpath, errno, errmsg);
     DBUG_RETURN(0);
   }
-  /* Determine interface version */
-  if (!(sym= dlsym(plugin_dl.handle, plugin_interface_version_sym)))
+
+  /* Checks which plugin interface present and reads info */
+  if (!(sym= dlsym(plugin_dl.handle, maria_plugin_interface_version_sym)))
   {
-    free_plugin_mem(&plugin_dl);
-    report_error(report, ER_CANT_FIND_DL_ENTRY, plugin_interface_version_sym);
-    DBUG_RETURN(0);
+    if (read_mysql_plugin_info(&plugin_dl,
+                               dlsym(plugin_dl.handle,
+                                     plugin_interface_version_sym),
+                               dlpath,
+                               report))
+      DBUG_RETURN(0);
   }
-  plugin_dl.version= *(int *)sym;
-  /* Versioning */
-  if (plugin_dl.version < min_plugin_interface_version ||
-      (plugin_dl.version >> 8) > (MYSQL_PLUGIN_INTERFACE_VERSION >> 8))
+  else
   {
-    free_plugin_mem(&plugin_dl);
-    report_error(report, ER_CANT_OPEN_LIBRARY, dlpath, 0,
-                 "plugin interface version mismatch");
-    DBUG_RETURN(0);
+    if (read_maria_plugin_info(&plugin_dl, sym, dlpath, report))
+      DBUG_RETURN(0);
   }
 
   /* link the services in */
@@ -508,87 +750,6 @@ static st_plugin_dl *plugin_dl_add(const LEX_STRING *dl, int report)
     }
   }
 
-  /* Find plugin declarations */
-  if (!(sym= dlsym(plugin_dl.handle, plugin_declarations_sym)))
-  {
-    free_plugin_mem(&plugin_dl);
-    report_error(report, ER_CANT_FIND_DL_ENTRY, plugin_declarations_sym);
-    DBUG_RETURN(0);
-  }
-
-  if (plugin_dl.version != MYSQL_PLUGIN_INTERFACE_VERSION)
-  {
-    uint sizeof_st_plugin;
-    struct st_mysql_plugin *old, *cur;
-    char *ptr= (char *)sym;
-
-    if ((sym= dlsym(plugin_dl.handle, sizeof_st_plugin_sym)))
-      sizeof_st_plugin= *(int *)sym;
-    else
-    {
-#ifdef ERROR_ON_NO_SIZEOF_PLUGIN_SYMBOL
-      report_error(report, ER_CANT_FIND_DL_ENTRY, sizeof_st_plugin_sym);
-      DBUG_RETURN(0);
-#else
-      /*
-        When the following assert starts failing, we'll have to switch
-        to the upper branch of the #ifdef
-      */
-      DBUG_ASSERT(min_plugin_interface_version == 0);
-      sizeof_st_plugin= (int)offsetof(struct st_mysql_plugin, version);
-#endif
-    }
-
-    /*
-      What's the purpose of this loop? If the goal is to catch a
-      missing 0 record at the end of a list, it will fail miserably
-      since the compiler is likely to optimize this away. /Matz
-     */
-    for (i= 0;
-         ((struct st_mysql_plugin *)(ptr+i*sizeof_st_plugin))->info;
-         i++)
-      /* no op */;
-
-    cur= (struct st_mysql_plugin*)
-      my_malloc((i+1)*sizeof(struct st_mysql_plugin), MYF(MY_ZEROFILL|MY_WME));
-    if (!cur)
-    {
-      free_plugin_mem(&plugin_dl);
-      report_error(report, ER_OUTOFMEMORY,
-                   static_cast<int>(plugin_dl.dl.length));
-      DBUG_RETURN(0);
-    }
-    /*
-      All st_plugin fields not initialized in the plugin explicitly, are
-      set to 0. It matches C standard behaviour for struct initializers that
-      have less values than the struct definition.
-    */
-    for (i=0;
-         (old=(struct st_mysql_plugin *)(ptr+i*sizeof_st_plugin))->info;
-         i++)
-      memcpy(cur+i, old, min(sizeof(cur[i]), sizeof_st_plugin));
-
-    sym= cur;
-  }
-  plugin_dl.plugins= (struct st_mysql_plugin *)sym;
-
-  /*
-    If report is REPORT_TO_USER, we were called from
-    mysql_install_plugin. Otherwise, we are called directly or
-    indirectly from plugin_init.
-   */
-  if (report == REPORT_TO_USER)
-  {
-    st_mysql_plugin *plugin= plugin_dl.plugins;
-    for ( ; plugin->info ; ++plugin)
-      if (plugin->flags & PLUGIN_OPT_NO_INSTALL)
-      {
-        report_error(report, ER_PLUGIN_NO_INSTALL, plugin->name);
-        free_plugin_mem(&plugin_dl);
-        DBUG_RETURN(0);
-   }
-  }
-
   /* Duplicate and convert dll name */
   plugin_dl.dl.length= dl->length * files_charset_info->mbmaxlen + 1;
   if (! (plugin_dl.dl.str= (char*) my_malloc(plugin_dl.dl.length, MYF(0))))
@@ -721,7 +882,10 @@ static plugin_ref intern_plugin_lock(LEX *lex, plugin_ref rc)
   {
     plugin_ref plugin;
 #ifdef DBUG_OFF
-    /* built-in plugins don't need ref counting */
+    /*
+      In optimized builds we don't do reference counting for built-in
+      (plugin->plugin_dl == 0) plugins.
+    */
     if (!pi->plugin_dl)
       DBUG_RETURN(pi);
 
@@ -738,7 +902,7 @@ static plugin_ref intern_plugin_lock(LEX *lex, plugin_ref rc)
     *plugin= pi;
 #endif
     pi->ref_count++;
-    DBUG_PRINT("info",("thd: 0x%lx, plugin: \"%s\", ref_count: %d",
+    DBUG_PRINT("lock",("thd: 0x%lx  plugin: \"%s\" LOCK ref_count: %d",
                        (long) current_thd, pi->name.str, pi->ref_count));
 
     if (lex)
@@ -749,13 +913,33 @@ static plugin_ref intern_plugin_lock(LEX *lex, plugin_ref rc)
 }
 
 
-plugin_ref plugin_lock(THD *thd, plugin_ref *ptr)
+plugin_ref plugin_lock(THD *thd, plugin_ref ptr)
 {
   LEX *lex= thd ? thd->lex : 0;
   plugin_ref rc;
   DBUG_ENTER("plugin_lock");
+
+#ifdef DBUG_OFF
+  /*
+    In optimized builds we don't do reference counting for built-in
+    (plugin->plugin_dl == 0) plugins.
+
+    Note that we access plugin->plugin_dl outside of LOCK_plugin, and for
+    dynamic plugins a 'plugin' could correspond to plugin that was unloaded
+    meanwhile!  But because st_plugin_int is always allocated on
+    plugin_mem_root, the pointer can never be invalid - the memory is never
+    freed.
+    Of course, the memory that 'plugin' points to can be overwritten by
+    another plugin being loaded, but plugin->plugin_dl can never change
+    from zero to non-zero or vice versa.
+    That is, it's always safe to check for plugin->plugin_dl==0 even
+    without a mutex.
+  */
+  if (! plugin_dlib(ptr))
+    DBUG_RETURN(ptr);
+#endif
   mysql_mutex_lock(&LOCK_plugin);
-  rc= my_intern_plugin_lock_ci(lex, *ptr);
+  rc= my_intern_plugin_lock_ci(lex, ptr);
   mysql_mutex_unlock(&LOCK_plugin);
   DBUG_RETURN(rc);
 }
@@ -808,7 +992,7 @@ static bool plugin_add(MEM_ROOT *tmp_root,
                        int *argc, char **argv, int report)
 {
   struct st_plugin_int tmp;
-  struct st_mysql_plugin *plugin;
+  struct st_maria_plugin *plugin;
   DBUG_ENTER("plugin_add");
   if (plugin_find_internal(name, MYSQL_ANY_PLUGIN))
   {
@@ -842,6 +1026,17 @@ static bool plugin_add(MEM_ROOT *tmp_root,
         report_error(report, ER_CANT_OPEN_LIBRARY, dl->str, 0, buf);
         goto err;
       }
+      if (plugin_maturity_map[plugin->maturity] < plugin_maturity)
+      {
+        char buf[256];
+        strxnmov(buf, sizeof(buf) - 1, "Loading of ",
+                 plugin_maturity_names[plugin->maturity],
+                 " plugins is prohibited by --plugin-maturity=",
+                 plugin_maturity_names[plugin_maturity],
+                 NullS);
+        report_error(report, ER_CANT_OPEN_LIBRARY, dl->str, 0, buf);
+        goto err;
+      }
       tmp.plugin= plugin;
       tmp.name.str= (char *)plugin->name;
       tmp.name.length= name_len;
@@ -864,10 +1059,6 @@ static bool plugin_add(MEM_ROOT *tmp_root,
       mysql_del_sys_var_chain(tmp.system_vars);
       restore_pluginvar_names(tmp.system_vars);
       goto err;
-
-      /* plugin was disabled */
-      plugin_dl_del(dl);
-      DBUG_RETURN(FALSE);
     }
   }
   report_error(report, ER_CANT_FIND_DL_ENTRY, name->str);
@@ -936,7 +1127,7 @@ static void plugin_deinitialize(struct st_plugin_int *plugin, bool ref_check)
 
 static void plugin_del(struct st_plugin_int *plugin)
 {
-  DBUG_ENTER("plugin_del(plugin)");
+  DBUG_ENTER("plugin_del");
   mysql_mutex_assert_owner(&LOCK_plugin);
   /* Free allocated strings before deleting the plugin. */
   mysql_rwlock_wrlock(&LOCK_system_variables_hash);
@@ -1013,8 +1204,6 @@ static void intern_plugin_unlock(LEX *lex, plugin_ref plugin)
   my_free(plugin);
 #endif
 
-  DBUG_PRINT("info",("unlocking plugin, name= %s, ref_count= %d",
-                     pi->name.str, pi->ref_count));
   if (lex)
   {
     /*
@@ -1034,6 +1223,9 @@ static void intern_plugin_unlock(LEX *lex, plugin_ref plugin)
   DBUG_ASSERT(pi->ref_count);
   pi->ref_count--;
 
+  DBUG_PRINT("lock",("thd: 0x%lx  plugin: \"%s\" UNLOCK ref_count: %d",
+                     (long) current_thd, pi->name.str, pi->ref_count));
+
   if (pi->state == PLUGIN_IS_DELETED && !pi->ref_count)
     reap_needed= true;
 
@@ -1064,6 +1256,9 @@ void plugin_unlock_list(THD *thd, plugin_ref *list, uint count)
 {
   LEX *lex= thd ? thd->lex : 0;
   DBUG_ENTER("plugin_unlock_list");
+  if (count == 0)
+    DBUG_VOID_RETURN;
+
   DBUG_ASSERT(list);
   mysql_mutex_lock(&LOCK_plugin);
   while (count--)
@@ -1224,8 +1419,8 @@ int plugin_init(int *argc, char **argv, int flags)
 {
   uint i;
   bool is_myisam;
-  struct st_mysql_plugin **builtins;
-  struct st_mysql_plugin *plugin;
+  struct st_maria_plugin **builtins;
+  struct st_maria_plugin *plugin;
   struct st_plugin_int tmp, *plugin_ptr, **reap;
   MEM_ROOT tmp_root;
   bool reaped_mandatory_plugin= false;
@@ -1284,6 +1479,7 @@ int plugin_init(int *argc, char **argv, int flags)
           !my_strnncoll(&my_charset_latin1, (const uchar*) plugin->name,
                         6, (const uchar*) "InnoDB", 6))
         continue;
+
       bzero(&tmp, sizeof(tmp));
       tmp.plugin= plugin;
       tmp.name.str= (char *)plugin->name;
@@ -1318,15 +1514,23 @@ int plugin_init(int *argc, char **argv, int flags)
       if (register_builtin(plugin, &tmp, &plugin_ptr))
         goto err_unlock;
 
-      /* only initialize MyISAM and CSV at this stage */
-      if (!(is_myisam=
-            !my_strcasecmp(&my_charset_latin1, plugin->name, "MyISAM")) &&
-          my_strcasecmp(&my_charset_latin1, plugin->name, "CSV"))
-        continue;
+      is_myisam= !my_strcasecmp(&my_charset_latin1, plugin->name, "MyISAM");
 
-      if (plugin_ptr->state != PLUGIN_IS_UNINITIALIZED ||
-          plugin_initialize(plugin_ptr))
-        goto err_unlock;
+      /*
+        strictly speaking, we should to initialize all plugins,
+        even for mysqld --help, because important subsystems
+        may be disabled otherwise, and the help will be incomplete.
+        For example, if the mysql.plugin table is not MyISAM.
+        But for now it's an unlikely corner case, and to optimize
+        mysqld --help for all other users, we will only initialize
+        MyISAM here.
+      */
+      if (!(flags & PLUGIN_INIT_SKIP_INITIALIZATION) || is_myisam)
+      {
+        if (plugin_ptr->state == PLUGIN_IS_UNINITIALIZED &&
+            plugin_initialize(plugin_ptr))
+          goto err_unlock;
+      }
 
       /*
         initialize the global default storage engine so that it may
@@ -1412,7 +1616,7 @@ err:
 }
 
 
-static bool register_builtin(struct st_mysql_plugin *plugin,
+static bool register_builtin(struct st_maria_plugin *plugin,
                              struct st_plugin_int *tmp,
                              struct st_plugin_int **ptr)
 {
@@ -1478,20 +1682,21 @@ static void plugin_load(MEM_ROOT *tmp_root, int *argc, char **argv)
   if (result)
   {
     DBUG_PRINT("error",("Can't open plugin table"));
-    sql_print_error("Can't open the mysql.plugin table. Please "
-                    "run mysql_upgrade to create it.");
+    if (!opt_help)
+      sql_print_error("Can't open the mysql.plugin table. Please "
+                      "run mysql_upgrade to create it.");
+    else
+      sql_print_warning("Could not open mysql.plugin table. Some options may be missing from the help text");
     goto end;
   }
   table= tables.table;
-  init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE);
+  if (init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE))
+  {
+    sql_print_error("Could not initialize init_read_record; Plugins not "
+                    "loaded");
+    goto end;
+  }
   table->use_all_columns();
-  /*
-    there're no other threads running yet, so we don't need a mutex.
-    but plugin_add() before is designed to work in multi-threaded
-    environment, and it uses mysql_mutex_assert_owner(), so we lock
-    the mutex here to satisfy the assert
-  */
-  mysql_mutex_lock(&LOCK_plugin);
   while (!(error= read_record_info.read_record(&read_record_info)))
   {
     DBUG_PRINT("info", ("init plugin record"));
@@ -1502,12 +1707,19 @@ static void plugin_load(MEM_ROOT *tmp_root, int *argc, char **argv)
     LEX_STRING name= {(char *)str_name.ptr(), str_name.length()};
     LEX_STRING dl= {(char *)str_dl.ptr(), str_dl.length()};
 
+    /*
+      there're no other threads running yet, so we don't need a mutex.
+      but plugin_add() before is designed to work in multi-threaded
+      environment, and it uses mysql_mutex_assert_owner(), so we lock
+      the mutex here to satisfy the assert
+    */
+    mysql_mutex_lock(&LOCK_plugin);
     if (plugin_add(tmp_root, &name, &dl, argc, argv, REPORT_TO_LOG))
       sql_print_warning("Couldn't load plugin named '%s' with soname '%s'.",
                         str_name.c_ptr(), str_dl.c_ptr());
     free_root(tmp_root, MYF(MY_MARK_BLOCKS_FREE));
+    mysql_mutex_unlock(&LOCK_plugin);
   }
-  mysql_mutex_unlock(&LOCK_plugin);
   if (error > 0)
     sql_print_error(ER(ER_GET_ERRNO), my_errno);
   end_read_record(&read_record_info);
@@ -1529,7 +1741,7 @@ static bool plugin_load_list(MEM_ROOT *tmp_root, int *argc, char **argv,
   char buffer[FN_REFLEN];
   LEX_STRING name= {buffer, 0}, dl= {NULL, 0}, *str= &name;
   struct st_plugin_dl *plugin_dl;
-  struct st_mysql_plugin *plugin;
+  struct st_maria_plugin *plugin;
   char *p= buffer;
   DBUG_ENTER("plugin_load_list");
   while (list)
@@ -1696,7 +1908,8 @@ void plugin_shutdown(void)
       if (plugins[i]->ref_count)
         sql_print_error("Plugin '%s' has ref_count=%d after shutdown.",
                         plugins[i]->name.str, plugins[i]->ref_count);
-      if (plugins[i]->state & PLUGIN_IS_UNINITIALIZED)
+      if (plugins[i]->state & PLUGIN_IS_UNINITIALIZED ||
+          plugins[i]->state & PLUGIN_IS_DISABLED)
         plugin_del(plugins[i]);
     }
 
@@ -1801,9 +2014,10 @@ bool mysql_install_plugin(THD *thd, const LEX_STRING *name, const LEX_STRING *dl
 
   if (tmp->state == PLUGIN_IS_DISABLED)
   {
-    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                        ER_CANT_INITIALIZE_UDF, ER(ER_CANT_INITIALIZE_UDF),
-                        name->str, "Plugin is disabled");
+    if (global_system_variables.log_warnings)
+      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_CANT_INITIALIZE_UDF, ER(ER_CANT_INITIALIZE_UDF),
+                          name->str, "Plugin is disabled");
   }
   else
   {
@@ -1906,16 +2120,6 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_STRING *name)
     my_error(ER_PLUGIN_IS_PERMANENT, MYF(0), name->str);
     goto err;
   }
-  /*
-    Error message for ER_PLUGIN_IS_PERMANENT is not suitable for
-    plugins marked as not dynamically uninstallable, so we have a
-    separate one instead of changing the old one.
-   */
-  if (plugin->plugin->flags & PLUGIN_OPT_NO_UNINSTALL)
-  {
-    my_error(ER_PLUGIN_NO_UNINSTALL, MYF(0), plugin->plugin->name);
-    goto err;
-  }
 
   plugin->state= PLUGIN_IS_DELETED;
   if (plugin->ref_count)
@@ -1931,8 +2135,8 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_STRING *name)
   table->field[0]->store(name->str, name->length, system_charset_info);
   key_copy(user_key, table->record[0], table->key_info,
            table->key_info->key_length);
-  if (! table->file->index_read_idx_map(table->record[0], 0, user_key,
-                                        HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+  if (! table->file->ha_index_read_idx_map(table->record[0], 0, user_key,
+                                           HA_WHOLE_KEY, HA_READ_KEY_EXACT))
   {
     int error;
     /*
@@ -2027,6 +2231,7 @@ err:
 #undef MYSQL_SYSVAR_NAME
 #define MYSQL_SYSVAR_NAME(name) name
 #define PLUGIN_VAR_TYPEMASK 0x007f
+#define PLUGIN_VAR_BOOKMARK_KEY (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_MEMALLOC)
 
 #define EXTRA_OPTIONS 3 /* options for: 'foo', 'plugin-foo' and NULL */
 
@@ -2380,7 +2585,7 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
   else
     memcpy(varname + 1, name, namelen + 1);
 
-  varname[0]= flags & PLUGIN_VAR_TYPEMASK;
+  varname[0]= flags & PLUGIN_VAR_BOOKMARK_KEY;
 
   result= (st_bookmark*) my_hash_search(&bookmark_hash,
                                         (const uchar*) varname, length - 1);
@@ -2438,7 +2643,7 @@ static st_bookmark *register_var(const char *plugin, const char *name,
   {
     result= (st_bookmark*) alloc_root(&plugin_mem_root,
                                       sizeof(struct st_bookmark) + length-1);
-    varname[0]= flags & PLUGIN_VAR_TYPEMASK;
+    varname[0]= flags & PLUGIN_VAR_BOOKMARK_KEY;
     memcpy(result->key, varname, length);
     result->name_len= length - 2;
     result->offset= -1;
@@ -2511,11 +2716,12 @@ static void restore_pluginvar_names(sys_var *first)
 */
 static uchar *intern_sys_var_ptr(THD* thd, int offset, bool global_lock)
 {
+  DBUG_ENTER("intern_sys_var_ptr");
   DBUG_ASSERT(offset >= 0);
   DBUG_ASSERT((uint)offset <= global_system_variables.dynamic_variables_head);
 
   if (!thd)
-    return (uchar*) global_system_variables.dynamic_variables_ptr + offset;
+    DBUG_RETURN((uchar*) global_system_variables.dynamic_variables_ptr + offset);
 
   /*
     dynamic_variables_head points to the largest valid offset
@@ -2554,10 +2760,12 @@ static uchar *intern_sys_var_ptr(THD* thd, int offset, bool global_lock)
       sys_var *var;
       st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx);
 
-      if (v->version <= thd->variables.dynamic_variables_version ||
-          !(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
+      if (v->version <= thd->variables.dynamic_variables_version)
+        continue; /* already in thd->variables */
+
+      if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
           !(pi= var->cast_pluginvar()) ||
-          v->key[0] != (pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK))
+          v->key[0] != (pi->plugin_var->flags & PLUGIN_VAR_BOOKMARK_KEY))
         continue;
 
       /* Here we do anything special that may be required of the data types */
@@ -2585,7 +2793,7 @@ static uchar *intern_sys_var_ptr(THD* thd, int offset, bool global_lock)
 
     mysql_rwlock_unlock(&LOCK_system_variables_hash);
   }
-  return (uchar*)thd->variables.dynamic_variables_ptr + offset;
+  DBUG_RETURN((uchar*)thd->variables.dynamic_variables_ptr + offset);
 }
 
 
@@ -2676,27 +2884,22 @@ static void unlock_variables(THD *thd, struct system_variables *vars)
 static void cleanup_variables(THD *thd, struct system_variables *vars)
 {
   st_bookmark *v;
-  sys_var_pluginvar *pivar;
-  sys_var *var;
-  int flags;
   uint idx;
 
   mysql_rwlock_rdlock(&LOCK_system_variables_hash);
   for (idx= 0; idx < bookmark_hash.records; idx++)
   {
     v= (st_bookmark*) my_hash_element(&bookmark_hash, idx);
-    if (v->version > vars->dynamic_variables_version ||
-        !(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
-        !(pivar= var->cast_pluginvar()) ||
-        v->key[0] != (pivar->plugin_var->flags & PLUGIN_VAR_TYPEMASK))
-      continue;
 
-    flags= pivar->plugin_var->flags;
+    if (v->version > vars->dynamic_variables_version)
+      continue; /* not in vars */
+
+    DBUG_ASSERT((uint)v->offset <= vars->dynamic_variables_head);
 
-    if ((flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
-        flags & PLUGIN_VAR_THDLOCAL && flags & PLUGIN_VAR_MEMALLOC)
+    if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
+         v->key[0] & PLUGIN_VAR_MEMALLOC)
     {
-      char **ptr= (char**) pivar->real_value_ptr(thd, OPT_SESSION);
+      char **ptr= (char**)(vars->dynamic_variables_ptr + v->offset);
       my_free(*ptr);
       *ptr= NULL;
     }
@@ -3403,11 +3606,9 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
   DBUG_ASSERT(tmp->plugin && tmp->name.str);
 
   /*
-    The 'federated' and 'ndbcluster' storage engines are always disabled by
-    default.
+    The 'ndbcluster' storage engines is always disabled by default.
   */
-  if (!(my_strcasecmp(&my_charset_latin1, tmp->name.str, "federated") &&
-      my_strcasecmp(&my_charset_latin1, tmp->name.str, "ndbcluster")))
+  if (!my_strcasecmp(&my_charset_latin1, tmp->name.str, "ndbcluster"))
     plugin_load_option= PLUGIN_OFF;
 
   for (opt= tmp->plugin->system_vars; opt && *opt; opt++)
diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h
index c3e6cc6fb28..f0b706bc322 100644
--- a/sql/sql_plugin.h
+++ b/sql/sql_plugin.h
@@ -23,7 +23,7 @@
   that is defined in plugin.h
 */
 #define SHOW_always_last SHOW_KEY_CACHE_LONG, \
-            SHOW_KEY_CACHE_LONGLONG, SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, \
+            SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, \
             SHOW_HAVE, SHOW_MY_BOOL, SHOW_HA_ROWS, SHOW_SYS, \
             SHOW_LONG_NOFLUSH, SHOW_LONGLONG_STATUS, SHOW_LEX_STRING
 #include <mysql/plugin.h>
@@ -59,7 +59,6 @@ extern const char *global_plugin_typelib_names[];
 
 typedef enum enum_mysql_show_type SHOW_TYPE;
 typedef struct st_mysql_show_var SHOW_VAR;
-typedef struct st_mysql_lex_string LEX_STRING;
 
 #define MYSQL_ANY_PLUGIN         -1
 
@@ -83,8 +82,10 @@ struct st_plugin_dl
 {
   LEX_STRING dl;
   void *handle;
-  struct st_mysql_plugin *plugins;
-  int version;
+  struct st_maria_plugin *plugins;
+  int mysqlversion;
+  int mariaversion;
+  bool   allocated;
   uint ref_count;            /* number of plugins loaded from the library */
 };
 
@@ -93,7 +94,7 @@ struct st_plugin_dl
 struct st_plugin_int
 {
   LEX_STRING name;
-  struct st_mysql_plugin *plugin;
+  struct st_maria_plugin *plugin;
   struct st_plugin_dl *plugin_dl;
   uint state;
   uint ref_count;               /* number of threads using the plugin */
@@ -110,6 +111,8 @@ struct st_plugin_int
 */
 #ifdef DBUG_OFF
 typedef struct st_plugin_int *plugin_ref;
+#define plugin_ref_to_int(A) A
+#define plugin_int_to_ref(A) A
 #define plugin_decl(pi) ((pi)->plugin)
 #define plugin_dlib(pi) ((pi)->plugin_dl)
 #define plugin_data(pi,cast) ((cast)((pi)->data))
@@ -119,6 +122,8 @@ typedef struct st_plugin_int *plugin_ref;
 #define plugin_equals(p1,p2) ((p1) == (p2))
 #else
 typedef struct st_plugin_int **plugin_ref;
+#define plugin_ref_to_int(A) (A ? A[0] : NULL)
+#define plugin_int_to_ref(A) &(A)
 #define plugin_decl(pi) ((pi)[0]->plugin)
 #define plugin_dlib(pi) ((pi)[0]->plugin_dl)
 #define plugin_data(pi,cast) ((cast)((pi)[0]->data))
@@ -134,6 +139,9 @@ extern char *opt_plugin_load;
 extern char *opt_plugin_dir_ptr;
 extern char opt_plugin_dir[FN_REFLEN];
 extern const LEX_STRING plugin_type_names[];
+extern ulong plugin_maturity;
+extern TYPELIB plugin_maturity_values;
+extern const char *plugin_maturity_names[];
 
 extern int plugin_init(int *argc, char **argv, int init_flags);
 extern void plugin_shutdown(void);
@@ -143,7 +151,7 @@ extern bool plugin_is_ready(const LEX_STRING *name, int type);
 #define my_plugin_lock_by_name_ci(A,B,C) plugin_lock_by_name(A,B,C)
 #define my_plugin_lock(A,B) plugin_lock(A,B)
 #define my_plugin_lock_ci(A,B) plugin_lock(A,B)
-extern plugin_ref plugin_lock(THD *thd, plugin_ref *ptr);
+extern plugin_ref plugin_lock(THD *thd, plugin_ref ptr);
 extern plugin_ref plugin_lock_by_name(THD *thd, const LEX_STRING *name,
                                       int type);
 extern void plugin_unlock(THD *thd, plugin_ref plugin);
diff --git a/sql/sql_plugin_services.h b/sql/sql_plugin_services.h
index 01a8c1eab47..7f2a4e5f955 100644
--- a/sql/sql_plugin_services.h
+++ b/sql/sql_plugin_services.h
@@ -46,13 +46,20 @@ static struct my_thread_scheduler_service my_thread_scheduler_handler= {
   my_thread_scheduler_reset,
 };
 
+static struct progress_report_service_st progress_report_handler= {
+  thd_progress_init,
+  thd_progress_report,
+  thd_progress_next_stage,
+  thd_progress_end,
+  set_thd_proc_info
+};
 
 static struct st_service_ref list_of_services[]=
 {
   { "my_snprintf_service", VERSION_my_snprintf, &my_snprintf_handler },
   { "thd_alloc_service",   VERSION_thd_alloc,   &thd_alloc_handler },
   { "thd_wait_service",    VERSION_thd_wait,    &thd_wait_handler },
-  { "my_thread_scheduler_service",
-    VERSION_my_thread_scheduler, &my_thread_scheduler_handler },
+  { "my_thread_scheduler_service", VERSION_my_thread_scheduler, &my_thread_scheduler_handler },
+  { "progress_report_service", VERSION_progress_report, &progress_report_handler }
 };
 
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 48b6886f6f2..ef6dd519d5d 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2002, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -113,6 +114,7 @@ When one supplies long data for a placeholder:
 #include <mysql_com.h>
 #endif
 #include "lock.h"                               // MYSQL_OPEN_FORCE_SHARED_MDL
+#include "sql_handler.h"
 
 /**
   A result class used to send cursor rows using the binary protocol.
@@ -124,7 +126,7 @@ class Select_fetch_protocol_binary: public select_send
 public:
   Select_fetch_protocol_binary(THD *thd);
   virtual bool send_result_set_metadata(List<Item> &list, uint flags);
-  virtual bool send_data(List<Item> &items);
+  virtual int send_data(List<Item> &items);
   virtual bool send_eof();
 #ifdef EMBEDDED_LIBRARY
   void begin_dataset()
@@ -238,9 +240,9 @@ protected:
   virtual bool store(const char *from, size_t length, CHARSET_INFO *cs);
   virtual bool store(const char *from, size_t length,
                      CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
-  virtual bool store(MYSQL_TIME *time);
+  virtual bool store(MYSQL_TIME *time, int decimals);
   virtual bool store_date(MYSQL_TIME *time);
-  virtual bool store_time(MYSQL_TIME *time);
+  virtual bool store_time(MYSQL_TIME *time, int decimals);
   virtual bool store(float value, uint32 decimals, String *buffer);
   virtual bool store(double value, uint32 decimals, String *buffer);
   virtual bool store(Field *field);
@@ -333,6 +335,8 @@ static bool send_prep_stmt(Prepared_statement *stmt, uint columns)
   int error;
   THD *thd= stmt->thd;
   DBUG_ENTER("send_prep_stmt");
+  DBUG_PRINT("enter",("stmt->id: %lu  columns: %d  param_count: %d",
+                      stmt->id, columns, stmt->param_count));
 
   buff[0]= 0;                                   /* OK packet indicator */
   int4store(buff+1, stmt->id);
@@ -577,8 +581,7 @@ static void set_param_time(Item_param *param, uchar **pos, ulong len)
   }
   else
     set_zero_time(&tm, MYSQL_TIMESTAMP_TIME);
-  param->set_time(&tm, MYSQL_TIMESTAMP_TIME,
-                  MAX_TIME_WIDTH * MY_CHARSET_BIN_MB_MAXLEN);
+  param->set_time(&tm, MYSQL_TIMESTAMP_TIME, MAX_TIME_FULL_WIDTH);
   *pos+= length;
 }
 
@@ -1245,6 +1248,8 @@ static bool mysql_test_insert(Prepared_statement *stmt,
   if (insert_precheck(thd, table_list))
     goto error;
 
+  //upgrade_lock_type_for_insert(thd, &table_list->lock_type, duplic,
+  //                             values_list.elements > 1);
   /*
     open temporary memory pool for temporary data allocated by derived
     tables & preparation procedure
@@ -1254,7 +1259,7 @@ static bool mysql_test_insert(Prepared_statement *stmt,
     TL_WRITE_DELAYED as having two such locks can cause table corruption.
   */
   if (open_normal_and_derived_tables(thd, table_list,
-                                     MYSQL_OPEN_FORCE_SHARED_MDL))
+                                     MYSQL_OPEN_FORCE_SHARED_MDL, DT_INIT))
     goto error;
 
   if ((values= its++))
@@ -1338,7 +1343,10 @@ static int mysql_test_update(Prepared_statement *stmt,
       open_tables(thd, &table_list, &table_count, MYSQL_OPEN_FORCE_SHARED_MDL))
     goto error;
 
-  if (table_list->multitable_view)
+  if (mysql_handle_derived(thd->lex, DT_INIT))
+    goto error;
+
+  if (table_list->is_multitable())
   {
     DBUG_ASSERT(table_list->view != 0);
     DBUG_PRINT("info", ("Switch to multi-update"));
@@ -1352,9 +1360,17 @@ static int mysql_test_update(Prepared_statement *stmt,
     thd->fill_derived_tables() is false here for sure (because it is
     preparation of PS, so we even do not check it).
   */
-  if (mysql_handle_derived(thd->lex, &mysql_derived_prepare))
+  if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+    goto error;
+  if (table_list->handle_derived(thd->lex, DT_PREPARE))
     goto error;
 
+  if (!table_list->updatable)
+  {
+    my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
+    goto error;
+  }
+
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
   /* Force privilege re-checking for views after they have been opened. */
   want_privilege= (table_list->view ? UPDATE_ACL :
@@ -1407,16 +1423,28 @@ error:
 static bool mysql_test_delete(Prepared_statement *stmt,
                               TABLE_LIST *table_list)
 {
+  uint table_count= 0;
   THD *thd= stmt->thd;
   LEX *lex= stmt->lex;
   DBUG_ENTER("mysql_test_delete");
 
   if (delete_precheck(thd, table_list) ||
-      open_normal_and_derived_tables(thd, table_list,
-                                     MYSQL_OPEN_FORCE_SHARED_MDL))
+      open_tables(thd, &table_list, &table_count, MYSQL_OPEN_FORCE_SHARED_MDL))
+    goto error;
+
+  if (mysql_handle_derived(thd->lex, DT_INIT))
+    goto error;
+  if (mysql_handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+    goto error;
+  if (mysql_handle_derived(thd->lex, DT_PREPARE))
     goto error;
 
-  if (!table_list->table)
+  if (!table_list->updatable)
+  {
+    my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE");
+    goto error;
+  }
+  if (!table_list->table || !table_list->table->created)
   {
     my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
              table_list->view_db.str, table_list->view_name.str);
@@ -1471,10 +1499,12 @@ static int mysql_test_select(Prepared_statement *stmt,
     goto error;
   }
 
-  if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL))
+  if (open_normal_and_derived_tables(thd, tables,  MYSQL_OPEN_FORCE_SHARED_MDL,
+                                     DT_PREPARE | DT_CREATE))
     goto error;
 
   thd->lex->used_tables= 0;                        // Updated by setup_fields
+  thd->thd_marker.emb_on_expr_nest= 0;
 
   /*
     JOIN::prepare calls
@@ -1532,7 +1562,8 @@ static bool mysql_test_do_fields(Prepared_statement *stmt,
                                    UINT_MAX, FALSE))
     DBUG_RETURN(TRUE);
 
-  if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL))
+  if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL,
+                                     DT_PREPARE | DT_CREATE))
     DBUG_RETURN(TRUE);
   DBUG_RETURN(setup_fields(thd, 0, *values, MARK_COLUMNS_NONE, 0, 0));
 }
@@ -1560,9 +1591,10 @@ static bool mysql_test_set_fields(Prepared_statement *stmt,
   THD *thd= stmt->thd;
   set_var_base *var;
 
-  if ((tables && check_table_access(thd, SELECT_ACL, tables, FALSE,
-                                    UINT_MAX, FALSE)) ||
-      open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL))
+  if ((tables &&
+       check_table_access(thd, SELECT_ACL, tables, FALSE, UINT_MAX, FALSE)) ||
+      open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL,
+                                     DT_PREPARE | DT_CREATE))
     goto error;
 
   while ((var= it++))
@@ -1597,9 +1629,9 @@ static bool mysql_test_call_fields(Prepared_statement *stmt,
   THD *thd= stmt->thd;
   Item *item;
 
-  if ((tables && check_table_access(thd, SELECT_ACL, tables, FALSE,
-                                    UINT_MAX, FALSE)) ||
-      open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL))
+  if ((tables &&
+       check_table_access(thd, SELECT_ACL, tables, FALSE, UINT_MAX, FALSE)) ||
+      open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL, DT_PREPARE))
     goto err;
 
   while ((item= it++))
@@ -1674,6 +1706,7 @@ select_like_stmt_test_with_open(Prepared_statement *stmt,
                                 int (*specific_prepare)(THD *thd),
                                 ulong setup_tables_done_option)
 {
+  uint table_count= 0;
   DBUG_ENTER("select_like_stmt_test_with_open");
 
   /*
@@ -1682,8 +1715,8 @@ select_like_stmt_test_with_open(Prepared_statement *stmt,
     prepared EXPLAIN yet so derived tables will clean up after
     themself.
   */
-  if (open_normal_and_derived_tables(stmt->thd, tables,
-                                     MYSQL_OPEN_FORCE_SHARED_MDL))
+  THD *thd= stmt->thd;
+  if (open_tables(thd, &tables, &table_count, MYSQL_OPEN_FORCE_SHARED_MDL))
     DBUG_RETURN(TRUE);
 
   DBUG_RETURN(select_like_stmt_test(stmt, specific_prepare,
@@ -1724,7 +1757,8 @@ static bool mysql_test_create_table(Prepared_statement *stmt)
       create_table->open_type= OT_BASE_ONLY;
 
     if (open_normal_and_derived_tables(stmt->thd, lex->query_tables,
-                                       MYSQL_OPEN_FORCE_SHARED_MDL))
+                                       MYSQL_OPEN_FORCE_SHARED_MDL,
+                                       DT_PREPARE | DT_CREATE))
       DBUG_RETURN(TRUE);
 
     select_lex->context.resolve_in_select_list= TRUE;
@@ -1744,7 +1778,8 @@ static bool mysql_test_create_table(Prepared_statement *stmt)
       which keeps metadata validation code simple.
     */
     if (open_normal_and_derived_tables(stmt->thd, lex->query_tables,
-                                       MYSQL_OPEN_FORCE_SHARED_MDL))
+                                       MYSQL_OPEN_FORCE_SHARED_MDL,
+                                       DT_PREPARE))
       DBUG_RETURN(TRUE);
   }
 
@@ -1777,7 +1812,8 @@ static bool mysql_test_create_view(Prepared_statement *stmt)
   if (create_view_precheck(thd, tables, view, lex->create_view_mode))
     goto err;
 
-  if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL))
+  if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL,
+                                     DT_PREPARE))
     goto err;
 
   lex->context_analysis_only|=  CONTEXT_ANALYSIS_ONLY_VIEW;
@@ -1922,6 +1958,56 @@ static bool mysql_test_insert_select(Prepared_statement *stmt,
   return res;
 }
 
+/**
+  Validate SELECT statement.
+
+    In case of success, if this query is not EXPLAIN, send column list info
+    back to the client.
+
+  @param stmt               prepared statement
+  @param tables             list of tables used in the query
+
+  @retval 0 success
+  @retval 1 error, error message is set in THD
+  @retval 2 success, and statement metadata has been sent
+*/
+
+static int mysql_test_handler_read(Prepared_statement *stmt,
+                                   TABLE_LIST *tables)
+{
+  THD *thd= stmt->thd;
+  LEX *lex= stmt->lex;
+  SQL_HANDLER *ha_table;
+  DBUG_ENTER("mysql_test_select");
+
+  lex->select_lex.context.resolve_in_select_list= TRUE;
+
+  /*
+    We don't have to test for permissions as this is already done during
+    HANDLER OPEN
+  */
+  if (!(ha_table= mysql_ha_read_prepare(thd, tables, lex->ha_read_mode,
+                                        lex->ident.str,
+                                        lex->insert_list,
+                                        lex->select_lex.where)))
+    DBUG_RETURN(1);
+
+  if (!stmt->is_sql_prepare())
+  {
+    if (!lex->result && !(lex->result= new (stmt->mem_root) select_send))
+    {
+      my_error(ER_OUTOFMEMORY, MYF(0), sizeof(select_send));
+      DBUG_RETURN(1);
+    }
+    if (send_prep_stmt(stmt, ha_table->fields.elements) ||
+        lex->result->send_result_set_metadata(ha_table->fields, Protocol::SEND_EOF) ||
+        thd->protocol->flush())
+      DBUG_RETURN(1);
+    DBUG_RETURN(2);
+  }
+  DBUG_RETURN(0);
+}
+
 
 /**
   Perform semantic analysis of the parsed tree and send a response packet
@@ -2040,6 +2126,11 @@ static bool check_prepared_statement(Prepared_statement *stmt)
     res= mysql_test_insert_select(stmt, tables);
     break;
 
+  case SQLCOM_HA_READ:
+    res= mysql_test_handler_read(stmt, tables);
+    /* Statement and field info has already been sent */
+    DBUG_RETURN(res == 1 ? TRUE : FALSE);
+
     /*
       Note that we don't need to have cases in this list if they are
       marked with CF_STATUS_COMMAND in sql_command_flags
@@ -2167,14 +2258,13 @@ void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length)
   Protocol *save_protocol= thd->protocol;
   Prepared_statement *stmt;
   DBUG_ENTER("mysqld_stmt_prepare");
-
   DBUG_PRINT("prep_query", ("%s", packet));
 
   /* First of all clear possible warnings from the previous command */
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
   if (! (stmt= new Prepared_statement(thd)))
-    DBUG_VOID_RETURN; /* out of memory: error is set in Sql_alloc */
+    goto end;           /* out of memory: error is set in Sql_alloc */
 
   if (thd->stmt_map.insert(thd, stmt))
   {
@@ -2182,7 +2272,7 @@ void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length)
       The error is set in the insert. The statement itself
       will be also deleted there (this is how the hash works).
     */
-    DBUG_VOID_RETURN;
+    goto end;
   }
 
   thd->protocol= &thd->protocol_binary;
@@ -2196,6 +2286,7 @@ void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length)
   thd->protocol= save_protocol;
 
   /* check_prepared_statemnt sends the metadata packet in case of success */
+end:
   DBUG_VOID_RETURN;
 }
 
@@ -2242,7 +2333,7 @@ static const char *get_dynamic_sql_string(LEX *lex, uint *query_len)
                                          lex->prepared_stmt_code.length))
         && entry->value)
     {
-      my_bool is_var_null;
+      bool is_var_null;
       var_value= entry->val_str(&is_var_null, &str, NOT_FIXED_DEC);
       /*
         NULL value of variable checked early as entry->value so here
@@ -2424,6 +2515,7 @@ void reinit_stmt_before_use(THD *thd, LEX *lex)
       /* Fix ORDER list */
       for (order= sl->order_list.first; order; order= order->next)
         order->item= &order->item_ptr;
+      sl->handle_derived(lex, DT_REINIT);
 
       /* clear the no_error flag for INSERT/UPDATE IGNORE */
       sl->no_error= FALSE;
@@ -2474,9 +2566,6 @@ void reinit_stmt_before_use(THD *thd, LEX *lex)
   }
   lex->current_select= &lex->select_lex;
 
-  /* restore original list used in INSERT ... SELECT */
-  if (lex->leaf_tables_insert)
-    lex->select_lex.leaf_tables= lex->leaf_tables_insert;
 
   if (lex->result)
   {
@@ -2538,7 +2627,7 @@ void mysqld_stmt_execute(THD *thd, char *packet_arg, uint packet_length)
   packet+= 9;                               /* stmt_id + 5 bytes of flags */
 
   /* First of all clear possible warnings from the previous command */
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
   if (!(stmt= find_prepared_statement(thd, stmt_id)))
   {
@@ -2634,7 +2723,8 @@ void mysqld_stmt_fetch(THD *thd, char *packet, uint packet_length)
   DBUG_ENTER("mysqld_stmt_fetch");
 
   /* First of all clear possible warnings from the previous command */
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
+
   status_var_increment(thd->status_var.com_stmt_fetch);
   if (!(stmt= find_prepared_statement(thd, stmt_id)))
   {
@@ -2693,7 +2783,7 @@ void mysqld_stmt_reset(THD *thd, char *packet)
   DBUG_ENTER("mysqld_stmt_reset");
 
   /* First of all clear possible warnings from the previous command */
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
   status_var_increment(thd->status_var.com_stmt_reset);
   if (!(stmt= find_prepared_statement(thd, stmt_id)))
@@ -2908,11 +2998,11 @@ bool Select_fetch_protocol_binary::send_eof()
 }
 
 
-bool
+int
 Select_fetch_protocol_binary::send_data(List<Item> &fields)
 {
   Protocol *save_protocol= thd->protocol;
-  bool rc;
+  int rc;
 
   thd->protocol= &protocol;
   rc= select_send::send_data(fields);
@@ -3399,6 +3489,7 @@ Prepared_statement::execute_loop(String *expanded_query,
   Reprepare_observer reprepare_observer;
   bool error;
   int reprepare_attempt= 0;
+  bool need_set_parameters= true;
 
   /* Check if we got an error when sending long data */
   if (state == Query_arena::STMT_ERROR)
@@ -3407,10 +3498,18 @@ Prepared_statement::execute_loop(String *expanded_query,
     return TRUE;
   }
 
-  if (set_parameters(expanded_query, packet, packet_end))
+reexecute:
+  if (need_set_parameters &&
+      set_parameters(expanded_query, packet, packet_end))
     return TRUE;
 
-reexecute:
+  /*
+    if set_parameters() has generated warnings,
+    we need to repeat it when reexecuting, to recreate these
+    warnings.
+  */
+  need_set_parameters= thd->warning_info->statement_warn_count();
+
   reprepare_observer.reset_reprepare_observer();
 
   /*
@@ -3505,7 +3604,7 @@ Prepared_statement::execute_server_runnable(Server_runnable *server_runnable)
 bool
 Prepared_statement::reprepare()
 {
-  char saved_cur_db_name_buf[NAME_LEN+1];
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
   LEX_STRING saved_cur_db_name=
     { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
   LEX_STRING stmt_db_name= { db, db_length };
@@ -3665,7 +3764,7 @@ bool Prepared_statement::execute(String *expanded_query, bool open_cursor)
   Query_arena *old_stmt_arena;
   bool error= TRUE;
 
-  char saved_cur_db_name_buf[NAME_LEN+1];
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
   LEX_STRING saved_cur_db_name=
     { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
   bool cur_db_changed;
@@ -4267,8 +4366,10 @@ bool Protocol_local::store(const char *str, size_t length,
 
 /* Store MYSQL_TIME (in binary format) */
 
-bool Protocol_local::store(MYSQL_TIME *time)
+bool Protocol_local::store(MYSQL_TIME *time, int decimals)
 {
+  if (decimals != AUTO_SEC_PART_DIGITS)
+    time->second_part= sec_part_truncate(time->second_part, decimals);
   return store_column(time, sizeof(MYSQL_TIME));
 }
 
@@ -4283,8 +4384,10 @@ bool Protocol_local::store_date(MYSQL_TIME *time)
 
 /** Store MYSQL_TIME (in binary format) */
 
-bool Protocol_local::store_time(MYSQL_TIME *time)
+bool Protocol_local::store_time(MYSQL_TIME *time, int decimals)
 {
+  if (decimals != AUTO_SEC_PART_DIGITS)
+    time->second_part= sec_part_truncate(time->second_part, decimals);
   return store_column(time, sizeof(MYSQL_TIME));
 }
 
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index caf82beb982..33c5881d4f4 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -159,18 +160,55 @@
 #define OPTIMIZER_SWITCH_INDEX_MERGE_UNION         (1ULL << 1)
 #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION    (1ULL << 2)
 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT     (1ULL << 3)
-#define OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN (1ULL << 4)
-#define OPTIMIZER_SWITCH_LAST                      (1ULL << 5)
-
-/* The following must be kept in sync with optimizer_switch_str in mysqld.cc */
-#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \
-                                  OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN)
-
+#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT (1ULL << 4)
+#define OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN (1ULL << 5)
+#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN       (1ULL << 6)
+#define OPTIMIZER_SWITCH_DERIVED_MERGE             (1ULL << 7)
+#define OPTIMIZER_SWITCH_DERIVED_WITH_KEYS         (1ULL << 8)
+#define OPTIMIZER_SWITCH_FIRSTMATCH                (1ULL << 9)
+#define OPTIMIZER_SWITCH_LOOSE_SCAN                (1ULL << 10)
+#define OPTIMIZER_SWITCH_MATERIALIZATION           (1ULL << 11)
+#define OPTIMIZER_SWITCH_IN_TO_EXISTS              (1ULL << 12)
+#define OPTIMIZER_SWITCH_SEMIJOIN                  (1ULL << 13)
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE (1ULL << 14)
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN  (1ULL << 15)
+#define OPTIMIZER_SWITCH_SUBQUERY_CACHE            (1ULL << 16)
+/** If this is off, MRR is never used. */
+#define OPTIMIZER_SWITCH_MRR                       (1ULL << 17)
+/**
+   If OPTIMIZER_SWITCH_MRR is on and this is on, MRR is used depending on a
+   cost-based choice ("automatic"). If OPTIMIZER_SWITCH_MRR is on and this is
+   off, MRR is "forced" (i.e. used as long as the storage engine is capable of
+   doing it).
+*/
+#define OPTIMIZER_SWITCH_MRR_COST_BASED            (1ULL << 18)
+#define OPTIMIZER_SWITCH_MRR_SORT_KEYS             (1ULL << 19)
+#define OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE     (1ULL << 20)
+#define OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE       (1ULL << 21)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL    (1ULL << 22)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_HASHED         (1ULL << 23)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_BKA            (1ULL << 24)
+#define OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE (1ULL << 25)
+#define OPTIMIZER_SWITCH_TABLE_ELIMINATION         (1ULL << 26)
+#define OPTIMIZER_SWITCH_LAST                      (1ULL << 26)
 
 /*
+TODO: Materialization is off by default to mimic 5.1/5.2 behavior.
+Once cost based choice between materialization and in-to-exists should be
+enabled by default, add OPTIMIZER_SWITCH_MATERIALIZATION
+*/
+#define OPTIMIZER_SWITCH_DEFAULT   (OPTIMIZER_SWITCH_INDEX_MERGE | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \
+                                    OPTIMIZER_SWITCH_TABLE_ELIMINATION | \
+                                    OPTIMIZER_SWITCH_IN_TO_EXISTS | \
+                                    OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
+                                    OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
+                                    OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL | \
+                                    OPTIMIZER_SWITCH_JOIN_CACHE_HASHED | \
+                                    OPTIMIZER_SWITCH_JOIN_CACHE_BKA)
+/*
   Replication uses 8 bytes to store SQL_MODE in the binary log. The day you
   use strictly more than 64 bits by adding one more define above, you should
   contact the replication team because the replication code should then be
@@ -208,16 +246,35 @@
   it's a constant one.
 */
 #define CONTEXT_ANALYSIS_ONLY_DERIVED 4
+/*
+  Don't evaluate constant sub-expressions of virtual column
+  expressions when opening tables
+*/ 
+#define CONTEXT_ANALYSIS_ONLY_VCOL_EXPR 8
 
-// uncachable cause
-#define UNCACHEABLE_DEPENDENT   1
+
+/*
+  Uncachable causes:
+*/
+/* This subquery has fields from outer query (put by user) */
+#define UNCACHEABLE_DEPENDENT_GENERATED   1
+/* This subquery contains functions with random result */
 #define UNCACHEABLE_RAND        2
+/* This subquery contains functions with side effect */
 #define UNCACHEABLE_SIDEEFFECT	4
-/// forcing to save JOIN for explain
+/* Forcing to save JOIN tables for explain */
 #define UNCACHEABLE_EXPLAIN     8
 /* For uncorrelated SELECT in an UNION with some correlated SELECTs */
 #define UNCACHEABLE_UNITED     16
 #define UNCACHEABLE_CHECKOPTION 32
+/*
+  This subquery has fields from outer query injected during
+  transformation process
+*/
+#define UNCACHEABLE_DEPENDENT_INJECTED  64
+/* This subquery has fields from outer query (any nature) */
+#define UNCACHEABLE_DEPENDENT (UNCACHEABLE_DEPENDENT_GENERATED | \
+                               UNCACHEABLE_DEPENDENT_INJECTED)
 
 /* Used to check GROUP BY list in the MODE_ONLY_FULL_GROUP_BY mode */
 #define UNDEF_POS (-1)
@@ -225,6 +282,11 @@
 /* BINLOG_DUMP options */
 
 #define BINLOG_DUMP_NON_BLOCK   1
+#endif /* !MYSQL_CLIENT */
+
+#define BINLOG_SEND_ANNOTATE_ROWS_EVENT   2
+
+#ifndef MYSQL_CLIENT
 
 /*
   Some defines for exit codes for ::is_equal class functions.
@@ -239,7 +301,9 @@ enum enum_parsing_place
   IN_HAVING,
   SELECT_LIST,
   IN_WHERE,
-  IN_ON
+  IN_ON,
+  IN_GROUP_BY,
+  PARSING_PLACE_SIZE /* always should be the last */
 };
 
 
@@ -256,10 +320,6 @@ enum enum_yes_no_unknown
 };
 
 #ifdef MYSQL_SERVER
-
-#endif /* MYSQL_SERVER */
-
-#ifdef MYSQL_SERVER
 /*
   External variables
 */
diff --git a/sql/sql_profile.cc b/sql/sql_profile.cc
index 5910833612a..aee42af8b71 100644
--- a/sql/sql_profile.cc
+++ b/sql/sql_profile.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2007, 2010, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -37,7 +37,7 @@
 #include "sql_class.h"                    // THD
 
 #define TIME_FLOAT_DIGITS 9
-/** two vals encoded: (dec*100)+len */
+/** two vals encoded: (len*100)+dec */
 #define TIME_I_S_DECIMAL_SIZE (TIME_FLOAT_DIGITS*100)+(TIME_FLOAT_DIGITS-3)
 
 #define MAX_QUERY_LENGTH 300
@@ -241,7 +241,7 @@ void PROF_MEASUREMENT::set_label(const char *status_arg,
 */
 void PROF_MEASUREMENT::collect()
 {
-  time_usecs= (double) my_getsystime() / 10.0;  /* 1 sec was 1e7, now is 1e6 */
+  time_usecs= my_interval_timer() / 1e3;  /* ns to us */
 #ifdef HAVE_GETRUSAGE
   getrusage(RUSAGE_SELF, &rusage);
 #elif defined(_WIN32)
@@ -250,6 +250,8 @@ void PROF_MEASUREMENT::collect()
   // which is typically ~15ms. So intervals shorter than that will not be
   // measurable by this function.
   GetProcessTimes(GetCurrentProcess(), &ftDummy, &ftDummy, &ftKernel, &ftUser);
+  GetProcessIoCounters(GetCurrentProcess(), &io_count);
+  GetProcessMemoryInfo(GetCurrentProcess(), &mem_count, sizeof(mem_count));
 #endif
 }
 
@@ -657,6 +659,17 @@ int PROFILING::fill_statistics_info(THD *thd_arg, TABLE_LIST *tables, Item *cond
       table->field[9]->store((uint32)(entry->rusage.ru_oublock -
                              previous->rusage.ru_oublock));
       table->field[9]->set_notnull();
+#elif defined(__WIN__)
+      ULONGLONG reads_delta = entry->io_count.ReadOperationCount - 
+                              previous->io_count.ReadOperationCount;
+      ULONGLONG writes_delta = entry->io_count.WriteOperationCount - 
+                              previous->io_count.WriteOperationCount;
+
+      table->field[8]->store((uint32)reads_delta);
+      table->field[8]->set_notnull();
+
+      table->field[9]->store((uint32)writes_delta);
+      table->field[9]->set_notnull();
 #else
       /* TODO: Add block IO info for non-BSD systems */
 #endif
@@ -679,6 +692,13 @@ int PROFILING::fill_statistics_info(THD *thd_arg, TABLE_LIST *tables, Item *cond
       table->field[13]->store((uint32)(entry->rusage.ru_minflt -
                              previous->rusage.ru_minflt), true);
       table->field[13]->set_notnull();
+#elif defined(__WIN__)
+      /* Windows APIs don't easily distinguish between hard and soft page
+         faults, so we just fill the 'major' column and leave the second NULL.
+      */
+      table->field[12]->store((uint32)(entry->mem_count.PageFaultCount -
+                             previous->mem_count.PageFaultCount), true);
+      table->field[12]->set_notnull();
 #else
       /* TODO: Add page fault info for non-BSD systems */
 #endif
diff --git a/sql/sql_profile.h b/sql/sql_profile.h
index 6a97be5657c..2e93912fb25 100644
--- a/sql/sql_profile.h
+++ b/sql/sql_profile.h
@@ -43,6 +43,10 @@ int make_profile_table_for_show(THD *thd, ST_SCHEMA_TABLE *schema_table);
 #include "sql_priv.h"
 #include "unireg.h"
 
+#ifdef __WIN__
+#include <psapi.h>
+#endif
+
 #ifdef HAVE_SYS_RESOURCE_H
 #include <sys/resource.h>
 #endif
@@ -174,6 +178,8 @@ private:
   struct rusage rusage;
 #elif defined(_WIN32)
   FILETIME ftKernel, ftUser;
+  IO_COUNTERS io_count;
+  PROCESS_MEMORY_COUNTERS mem_count;
 #endif
 
   char *function;
diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc
index b567e3a1f85..7a30973699b 100644
--- a/sql/sql_reload.cc
+++ b/sql/sql_reload.cc
@@ -26,6 +26,7 @@
 #include "sql_repl.h"    // reset_master, reset_slave
 #include "debug_sync.h"
 
+static void disable_checkpoints(THD *thd);
 
 /**
   Reload/resets privileges and the different caches.
@@ -157,7 +158,7 @@ bool reload_acl_and_cache(THD *thd, unsigned long options,
 #ifdef HAVE_QUERY_CACHE
   if (options & REFRESH_QUERY_CACHE_FREE)
   {
-    query_cache.pack();				// FLUSH QUERY CACHE
+    query_cache.pack(thd);              // FLUSH QUERY CACHE
     options &= ~REFRESH_QUERY_CACHE;    // Don't flush cache, just free memory
   }
   if (options & (REFRESH_TABLES | REFRESH_QUERY_CACHE))
@@ -208,6 +209,8 @@ bool reload_acl_and_cache(THD *thd, unsigned long options,
         thd->global_read_lock.unlock_global_read_lock(thd);
         return 1;
       }
+      if (options & REFRESH_CHECKPOINT)
+        disable_checkpoints(thd);
     }
     else
     {
@@ -296,6 +299,35 @@ bool reload_acl_and_cache(THD *thd, unsigned long options,
 #endif
  if (options & REFRESH_USER_RESOURCES)
    reset_mqh((LEX_USER *) NULL, 0);             /* purecov: inspected */
+  if (options & REFRESH_TABLE_STATS)
+  {
+    mysql_mutex_lock(&LOCK_global_table_stats);
+    free_global_table_stats();
+    init_global_table_stats();
+    mysql_mutex_unlock(&LOCK_global_table_stats);
+  }
+  if (options & REFRESH_INDEX_STATS)
+  {
+    mysql_mutex_lock(&LOCK_global_index_stats);
+    free_global_index_stats();
+    init_global_index_stats();
+    mysql_mutex_unlock(&LOCK_global_index_stats);
+  }
+  if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS))
+  {
+    mysql_mutex_lock(&LOCK_global_user_client_stats);
+    if (options & REFRESH_USER_STATS)
+    {
+      free_global_user_stats();
+      init_global_user_stats();
+    }
+    if (options & REFRESH_CLIENT_STATS)
+    {
+      free_global_client_stats();
+      init_global_client_stats();
+    }
+    mysql_mutex_unlock(&LOCK_global_user_client_stats);
+  }
  if (*write_to_binlog != -1)
    *write_to_binlog= tmp_write_to_binlog;
  /*
@@ -451,4 +483,18 @@ error:
 }
 
 
+/**
+   Disable checkpoints for all handlers
+   This is released in unlock_global_read_lock()
+*/
+
+static void disable_checkpoints(THD *thd)
+{
+  if (!thd->global_disable_checkpoint)
+  {
+    thd->global_disable_checkpoint= 1;
+    if (!global_disable_checkpoint++)
+      ha_checkpoint_state(1);                   // Disable checkpoints
+  }
+}
 
diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc
index f32dc75148a..6cc08e64d86 100644
--- a/sql/sql_rename.cc
+++ b/sql/sql_rename.cc
@@ -239,7 +239,7 @@ do_rename(THD *thd, TABLE_LIST *ren_table, char *new_db, char *new_table_name,
           char *new_table_alias, bool skip_error)
 {
   int rc= 1;
-  char name[FN_REFLEN + 1];
+  char new_name[FN_REFLEN + 1], old_name[FN_REFLEN + 1];
   const char *new_alias, *old_alias;
   frm_type_enum frm_type;
   enum legacy_db_type table_type;
@@ -258,17 +258,17 @@ do_rename(THD *thd, TABLE_LIST *ren_table, char *new_db, char *new_table_name,
   }
   DBUG_ASSERT(new_alias);
 
-  build_table_filename(name, sizeof(name) - 1,
+  build_table_filename(new_name, sizeof(new_name) - 1,
                        new_db, new_alias, reg_ext, 0);
-  if (!access(name,F_OK))
+  build_table_filename(old_name, sizeof(old_name) - 1,
+                       ren_table->db, old_alias, reg_ext, 0);
+  if (check_table_file_presence(old_name,
+                                new_name, new_db, new_alias, new_alias, TRUE))
   {
-    my_error(ER_TABLE_EXISTS_ERROR, MYF(0), new_alias);
     DBUG_RETURN(1);			// This can't be skipped
   }
-  build_table_filename(name, sizeof(name) - 1,
-                       ren_table->db, old_alias, reg_ext, 0);
 
-  frm_type= dd_frm_type(thd, name, &table_type);
+  frm_type= dd_frm_type(thd, old_name, &table_type);
   switch (frm_type)
   {
     case FRMTYPE_TABLE:
@@ -314,7 +314,7 @@ do_rename(THD *thd, TABLE_LIST *ren_table, char *new_db, char *new_table_name,
     default:
       DBUG_ASSERT(0); // should never happen
     case FRMTYPE_ERROR:
-      my_error(ER_FILE_NOT_FOUND, MYF(0), name, my_errno);
+      my_error(ER_FILE_NOT_FOUND, MYF(0), old_name, my_errno);
       break;
   }
   if (rc && !skip_error)
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index a37e8700e4f..91c78944205 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -41,6 +42,8 @@ static int binlog_dump_count = 0;
 */
 uint sql_slave_skip_counter;
 
+extern TYPELIB binlog_checksum_typelib;
+
 /*
     fake_rotate_event() builds a fake (=which does not exist physically in any
     binlog) Rotate event, which contains the name of the binlog we are going to
@@ -60,10 +63,21 @@ uint sql_slave_skip_counter;
 */
 
 static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
-                             ulonglong position, const char** errmsg)
+                             ulonglong position, const char** errmsg,
+                             uint8 checksum_alg_arg)
 {
   DBUG_ENTER("fake_rotate_event");
   char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN+100];
+
+  /*
+    this Rotate is to be sent with checksum if and only if
+    slave's get_master_version_and_clock time handshake value 
+    of master's @@global.binlog_checksum was TRUE
+  */
+
+  my_bool do_checksum= checksum_alg_arg != BINLOG_CHECKSUM_ALG_OFF &&
+    checksum_alg_arg != BINLOG_CHECKSUM_ALG_UNDEF;
+
   /*
     'when' (the timestamp) is set to 0 so that slave could distinguish between
     real and fake Rotate events (if necessary)
@@ -73,7 +87,8 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
 
   char* p = log_file_name+dirname_length(log_file_name);
   uint ident_len = (uint) strlen(p);
-  ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN;
+  ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN +
+    (do_checksum ? BINLOG_CHECKSUM_LEN : 0);
   int4store(header + SERVER_ID_OFFSET, server_id);
   int4store(header + EVENT_LEN_OFFSET, event_len);
   int2store(header + FLAGS_OFFSET, LOG_EVENT_ARTIFICIAL_F);
@@ -84,7 +99,19 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
   packet->append(header, sizeof(header));
   int8store(buf+R_POS_OFFSET,position);
   packet->append(buf, ROTATE_HEADER_LEN);
-  packet->append(p,ident_len);
+  packet->append(p, ident_len);
+
+  if (do_checksum)
+  {
+    char b[BINLOG_CHECKSUM_LEN];
+    ha_checksum crc= my_checksum(0L, NULL, 0);
+    crc= my_checksum(crc, (uchar*)header, sizeof(header));
+    crc= my_checksum(crc, (uchar*)buf, ROTATE_HEADER_LEN);
+    crc= my_checksum(crc, (uchar*)p, ident_len);
+    int4store(b, crc);
+    packet->append(b, sizeof(b));
+  }
+
   if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
   {
     *errmsg = "failed on my_net_write()";
@@ -193,6 +220,86 @@ static int send_file(THD *thd)
 }
 
 
+/**
+   Internal to mysql_binlog_send() routine that recalculates checksum for
+   a FD event (asserted) that needs additional arranment prior sending to slave.
+*/
+inline void fix_checksum(String *packet, ulong ev_offset)
+{
+  /* recalculate the crc for this event */
+  uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET);
+  ha_checksum crc= my_checksum(0L, NULL, 0);
+  DBUG_ASSERT(data_len == 
+              LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN +
+              BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN);
+  crc= my_checksum(crc, (uchar *)packet->ptr() + ev_offset, data_len -
+                   BINLOG_CHECKSUM_LEN);
+  int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc);
+}
+
+
+static user_var_entry * get_binlog_checksum_uservar(THD * thd)
+{
+  LEX_STRING name=  { C_STRING_WITH_LEN("master_binlog_checksum")};
+  user_var_entry *entry= 
+    (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str,
+                                  name.length);
+  return entry;
+}
+
+/**
+  Function for calling in mysql_binlog_send
+  to check if slave initiated checksum-handshake.
+
+  @param[in]    thd  THD to access a user variable
+
+  @return        TRUE if handshake took place, FALSE otherwise
+*/
+
+static bool is_slave_checksum_aware(THD * thd)
+{
+  DBUG_ENTER("is_slave_checksum_aware");
+  user_var_entry *entry= get_binlog_checksum_uservar(thd);
+  DBUG_RETURN(entry? true  : false);
+}
+
+/**
+  Function for calling in mysql_binlog_send
+  to get the value of @@binlog_checksum of the master at
+  time of checksum-handshake.
+
+  The value tells the master whether to compute or not, and the slave
+  to verify or not the first artificial Rotate event's checksum.
+
+  @param[in]    thd  THD to access a user variable
+
+  @return       value of @@binlog_checksum alg according to
+                @c enum enum_binlog_checksum_alg
+*/
+
+static uint8 get_binlog_checksum_value_at_connect(THD * thd)
+{
+  uint8 ret;
+
+  DBUG_ENTER("get_binlog_checksum_value_at_connect");
+  user_var_entry *entry= get_binlog_checksum_uservar(thd);
+  if (!entry)
+  {
+    ret= BINLOG_CHECKSUM_ALG_UNDEF;
+  }
+  else
+  {
+    DBUG_ASSERT(entry->type == STRING_RESULT);
+    String str;
+    uint dummy_errors;
+    str.copy(entry->value, entry->length, &my_charset_bin, &my_charset_bin,
+             &dummy_errors);
+    ret= (uint8) find_type ((char*) str.ptr(), &binlog_checksum_typelib, 1) - 1;
+    DBUG_ASSERT(ret <= BINLOG_CHECKSUM_ALG_CRC32); // while it's just on CRC32 alg
+  }
+  DBUG_RETURN(ret);
+}
+
 /*
   Adjust the position pointer in the binary log file for all running slaves
 
@@ -354,6 +461,9 @@ Increase max_allowed_packet on master";
   case LOG_READ_TRUNC:
     *errmsg = "binlog truncated in the middle of event; consider out of disk space on master";
     break;
+  case LOG_READ_CHECKSUM_FAILURE:
+    *errmsg = "event read from binlog did not pass crc check";
+    break;
   default:
     *errmsg = "unknown error reading log event on the master";
     break;
@@ -373,7 +483,7 @@ Increase max_allowed_packet on master";
 */ 
 static ulonglong get_heartbeat_period(THD * thd)
 {
-  my_bool null_value;
+  bool null_value;
   LEX_STRING name=  { C_STRING_WITH_LEN("master_heartbeat_period")};
   user_var_entry *entry= 
     (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str,
@@ -455,10 +565,11 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
   mysql_cond_t *log_cond;
 
   bool binlog_can_be_corrupted= FALSE;
+  uint8 current_checksum_alg= BINLOG_CHECKSUM_ALG_UNDEF;
+  int old_max_allowed_packet= thd->variables.max_allowed_packet;
 #ifndef DBUG_OFF
   int left_events = max_binlog_dump_events;
 #endif
-  int old_max_allowed_packet= thd->variables.max_allowed_packet;
   DBUG_ENTER("mysql_binlog_send");
   DBUG_PRINT("enter",("log_ident: '%s'  pos: %ld", log_ident, (long) pos));
 
@@ -574,7 +685,8 @@ impossible position";
     given that we want minimum modification of 4.0, we send the normal
     and fake Rotates.
   */
-  if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg))
+  if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg,
+                        get_binlog_checksum_value_at_connect(thd)))
   {
     /*
        This error code is not perfect, as fake_rotate_event() does not
@@ -610,8 +722,8 @@ impossible position";
        Try to find a Format_description_log_event at the beginning of
        the binlog
      */
-     if (!(error = Log_event::read_log_event(&log, packet, log_lock)))
-     {
+    if (!(error = Log_event::read_log_event(&log, packet, log_lock, 0)))
+    { 
        /*
          The packet has offsets equal to the normal offsets in a
          binlog event + ev_offset (the first ev_offset characters are
@@ -622,6 +734,23 @@ impossible position";
                    (*packet)[EVENT_TYPE_OFFSET+ev_offset]));
        if ((*packet)[EVENT_TYPE_OFFSET+ev_offset] == FORMAT_DESCRIPTION_EVENT)
        {
+         current_checksum_alg= get_checksum_alg(packet->ptr() + ev_offset,
+                                                packet->length() - ev_offset);
+         DBUG_ASSERT(current_checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+                     current_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
+                     current_checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
+         if (!is_slave_checksum_aware(thd) &&
+             current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+             current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+         {
+           my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
+           errmsg= "Slave can not handle replication events with the checksum "
+             "that master is configured to log";
+           sql_print_warning("Master is configured to log replication events "
+                             "with checksum, but will not send such events to "
+                             "slaves that cannot process them");
+           goto err;
+         }
          binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+ev_offset] &
                                        LOG_EVENT_BINLOG_IN_USE_F);
          (*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
@@ -637,6 +766,12 @@ impossible position";
           */
          int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+
                    ST_CREATED_OFFSET+ev_offset, (ulong) 0);
+
+	 /* fix the checksum due to latest changes in header */
+	 if (current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+             current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+           fix_checksum(packet, ev_offset);
+
          /* send it */
          if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
          {
@@ -680,7 +815,8 @@ impossible position";
     if (reset_transmit_packet(thd, flags, &ev_offset, &errmsg))
       goto err;
 
-    while (!(error= Log_event::read_log_event(&log, packet, log_lock)))
+    while (!(error = Log_event::read_log_event(&log, packet, log_lock,
+                                               current_checksum_alg)))
     {
 #ifndef DBUG_OFF
       if (max_binlog_dump_events && !left_events--)
@@ -697,7 +833,8 @@ impossible position";
       if (coord)
         coord->pos= uint4korr(packet->ptr() + ev_offset + LOG_POS_OFFSET);
 
-      event_type= (Log_event_type)((*packet)[LOG_EVENT_OFFSET+ev_offset]);
+      event_type=
+        (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET+ev_offset]);
       DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
                       {
                         if (event_type == XID_EVENT)
@@ -707,12 +844,35 @@ impossible position";
                             "now "
                             "wait_for signal.continue";
                           DBUG_ASSERT(opt_debug_sync_timeout > 0);
-                          DBUG_ASSERT(!debug_sync_set_action(current_thd,
+                          DBUG_ASSERT(!debug_sync_set_action(thd,
                                                              STRING_WITH_LEN(act)));
+                          const char act2[]=
+                            "now "
+                            "signal signal.continued";
+                          DBUG_ASSERT(!debug_sync_set_action(current_thd,
+                                                             STRING_WITH_LEN(act2)));
                         }
                       });
       if (event_type == FORMAT_DESCRIPTION_EVENT)
       {
+        current_checksum_alg= get_checksum_alg(packet->ptr() + ev_offset,
+                                               packet->length() - ev_offset);
+        DBUG_ASSERT(current_checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+                    current_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
+                    current_checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
+        if (!is_slave_checksum_aware(thd) &&
+            current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+            current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+        {
+          my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
+          errmsg= "Slave can not handle replication events with the checksum "
+            "that master is configured to log";
+          sql_print_warning("Master is configured to log replication events "
+                            "with checksum, but will not send such events to "
+                            "slaves that cannot process them");
+          goto err;
+        }
+
         binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+ev_offset] &
                                       LOG_EVENT_BINLOG_IN_USE_F);
         (*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
@@ -720,46 +880,50 @@ impossible position";
       else if (event_type == STOP_EVENT)
         binlog_can_be_corrupted= FALSE;
 
-      pos = my_b_tell(&log);
-      if (RUN_HOOK(binlog_transmit, before_send_event,
-                   (thd, flags, packet, log_file_name, pos)))
+      if (event_type != ANNOTATE_ROWS_EVENT ||
+          (flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT))
       {
-        my_errno= ER_UNKNOWN_ERROR;
-        errmsg= "run 'before_send_event' hook failed";
-        goto err;
-      }
+        pos = my_b_tell(&log);
+        if (RUN_HOOK(binlog_transmit, before_send_event,
+                     (thd, flags, packet, log_file_name, pos)))
+        {
+          my_errno= ER_UNKNOWN_ERROR;
+          errmsg= "run 'before_send_event' hook failed";
+          goto err;
+        }
 
-      if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
-      {
-	errmsg = "Failed on my_net_write()";
-	my_errno= ER_UNKNOWN_ERROR;
-	goto err;
-      }
+        if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
+        {
+          errmsg = "Failed on my_net_write()";
+          my_errno= ER_UNKNOWN_ERROR;
+          goto err;
+        }
 
-      DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
-                      {
-                        if (event_type == XID_EVENT)
+        DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
                         {
-                          net_flush(net);
-                        }
-                      });
-
-      DBUG_PRINT("info", ("log event code %d", event_type));
-      if (event_type == LOAD_EVENT)
-      {
-	if (send_file(thd))
-	{
-	  errmsg = "failed in send_file()";
-	  my_errno= ER_UNKNOWN_ERROR;
-	  goto err;
-	}
-      }
+                          if (event_type == XID_EVENT)
+                          {
+                            net_flush(net);
+                          }
+                        });
+
+        DBUG_PRINT("info", ("log event code %d", event_type));
+        if (event_type == LOAD_EVENT)
+        {
+          if (send_file(thd))
+          {
+            errmsg = "failed in send_file()";
+            my_errno= ER_UNKNOWN_ERROR;
+            goto err;
+          }
+        }
 
-      if (RUN_HOOK(binlog_transmit, after_send_event, (thd, flags, packet)))
-      {
-        errmsg= "Failed to run hook 'after_send_event'";
-        my_errno= ER_UNKNOWN_ERROR;
-        goto err;
+        if (RUN_HOOK(binlog_transmit, after_send_event, (thd, flags, packet)))
+        {
+          errmsg= "Failed to run hook 'after_send_event'";
+          my_errno= ER_UNKNOWN_ERROR;
+          goto err;
+        }
       }
 
       /* reset transmit packet for next loop */
@@ -825,14 +989,16 @@ impossible position";
 	*/
 
         mysql_mutex_lock(log_lock);
-        switch (error= Log_event::read_log_event(&log, packet, (mysql_mutex_t*) 0)) {
+        switch (error= Log_event::read_log_event(&log, packet, (mysql_mutex_t*) 0,
+                                                 current_checksum_alg)) {
 	case 0:
 	  /* we read successfully, so we'll need to send it to the slave */
           mysql_mutex_unlock(log_lock);
 	  read_packet = 1;
           if (coord)
             coord->pos= uint4korr(packet->ptr() + ev_offset + LOG_POS_OFFSET);
-          event_type= (Log_event_type)((*packet)[LOG_EVENT_OFFSET+ev_offset]);
+          event_type=
+            (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET+ev_offset]);
 	  break;
 
 	case LOG_READ_EOF:
@@ -903,7 +1069,9 @@ impossible position";
           goto err;
 	}
 
-	if (read_packet)
+	if (read_packet &&
+            (event_type != ANNOTATE_ROWS_EVENT ||
+             (flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)))
         {
           thd_proc_info(thd, "Sending binlog event to slave");
           pos = my_b_tell(&log);
@@ -985,7 +1153,7 @@ impossible position";
       */
       if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 ||
 	  fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE,
-                            &errmsg))
+                            &errmsg, current_checksum_alg))
       {
 	my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
 	goto err;
@@ -1671,7 +1839,6 @@ err:
   thd_proc_info(thd, 0);
   if (ret == FALSE)
     my_ok(thd);
-  delete_dynamic(&lex_mi->repl_ignore_server_ids); //freeing of parser-time alloc
   DBUG_RETURN(ret);
 }
 
@@ -1805,7 +1972,8 @@ bool mysql_show_binlog_events(THD* thd)
       This code will fail on a mixed relay log (one which has Format_desc then
       Rotate then Format_desc).
     */
-    ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event);
+    ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event,
+                                   opt_master_verify_checksum);
     if (ev)
     {
       if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
@@ -1827,8 +1995,12 @@ bool mysql_show_binlog_events(THD* thd)
 
     for (event_count = 0;
          (ev = Log_event::read_log_event(&log, (mysql_mutex_t*) 0,
-                                         description_event)); )
+                                         description_event,
+                                         opt_master_verify_checksum)); )
     {
+      if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
+        description_event->checksum_alg= ev->checksum_alg;
+
       if (event_count >= limit_start &&
 	  ev->net_send(protocol, linfo.log_file_name, pos))
       {
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index e67a6ebef62..5499806db81 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates.
+   2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -47,43 +48,46 @@
 #include "records.h"             // init_read_record, end_read_record
 #include "filesort.h"            // filesort_free_buffers
 #include "sql_union.h"           // mysql_union
+#include "opt_subselect.h"
+#include "log_slow.h"
+#include "sql_derived.h"
+
 #include "debug_sync.h"          // DEBUG_SYNC
 #include <m_ctype.h>
 #include <my_bit.h>
 #include <hash.h>
 #include <ft_global.h>
 
-#define PREV_BITS(type,A)	((type) (((type) 1 << (A)) -1))
-
 const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
 			      "MAYBE_REF","ALL","range","index","fulltext",
 			      "ref_or_null","unique_subquery","index_subquery",
-                              "index_merge"
-};
+                              "index_merge", "hash_ALL", "hash_range",
+                              "hash_index", "hash_index_merge" };
+
+const char *copy_to_tmp_table= "Copying to tmp table";
 
 struct st_sargable_param;
 
 static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array);
-static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds,
-				 DYNAMIC_ARRAY *keyuse);
+static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves, 
+                                 COND *conds, DYNAMIC_ARRAY *keyuse);
 static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
                                 JOIN_TAB *join_tab,
                                 uint tables, COND *conds,
-                                COND_EQUAL *cond_equal,
                                 table_map table_map, SELECT_LEX *select_lex,
                                 st_sargable_param **sargables);
+static bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse,
+                                   bool skip_unprefixed_keyparts);
 static int sort_keyuse(KEYUSE *a,KEYUSE *b);
-static void set_position(JOIN *join,uint index,JOIN_TAB *table,KEYUSE *key);
 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
 			       table_map used_tables);
-static bool choose_plan(JOIN *join,table_map join_tables);
-
-static void best_access_path(JOIN *join, JOIN_TAB *s, THD *thd,
-                             table_map remaining_tables, uint idx,
-                             double record_count, double read_time);
+void best_access_path(JOIN *join, JOIN_TAB *s, 
+                             table_map remaining_tables, uint idx, 
+                             bool disable_jbuf, double record_count,
+                             POSITION *pos, POSITION *loose_scan_pos);
 static void optimize_straight_join(JOIN *join, table_map join_tables);
 static bool greedy_search(JOIN *join, table_map remaining_tables,
-                             uint depth, uint prune_level);
+                          uint depth, uint prune_level);
 static bool best_extension_by_limited_search(JOIN *join,
                                              table_map remaining_tables,
                                              uint idx, double record_count,
@@ -91,8 +95,9 @@ static bool best_extension_by_limited_search(JOIN *join,
                                              uint prune_level);
 static uint determine_search_depth(JOIN* join);
 C_MODE_START
-static int join_tab_cmp(const void* ptr1, const void* ptr2);
-static int join_tab_cmp_straight(const void* ptr1, const void* ptr2);
+static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
 C_MODE_END
 /*
   TODO: 'find_best' is here only temporarily until 'greedy_search' is
@@ -101,21 +106,24 @@ C_MODE_END
 static bool find_best(JOIN *join,table_map rest_tables,uint index,
 		      double record_count,double read_time);
 static uint cache_record_length(JOIN *join,uint index);
-static double prev_record_reads(JOIN *join, uint idx, table_map found_ref);
 static bool get_best_combination(JOIN *join);
 static store_key *get_store_key(THD *thd,
 				KEYUSE *keyuse, table_map used_tables,
 				KEY_PART_INFO *key_part, uchar *key_buff,
 				uint maybe_null);
-static void make_outerjoin_info(JOIN *join);
+static bool make_outerjoin_info(JOIN *join);
+static Item*
+make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, table_map sjm_tables);
 static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
-static void make_join_readinfo(JOIN *join, ulonglong options);
+static void revise_cache_usage(JOIN_TAB *join_tab);
+static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after);
 static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
 static void update_depend_map(JOIN *join);
-static void update_depend_map(JOIN *join, ORDER *order);
+static void update_depend_map_for_order(JOIN *join, ORDER *order);
 static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond,
 			   bool change_list, bool *simple_order);
-static int return_zero_rows(JOIN *join, select_result *res,TABLE_LIST *tables,
+static int return_zero_rows(JOIN *join, select_result *res, 
+                            List<TABLE_LIST> &tables,
                             List<Item> &fields, bool send_row,
                             ulonglong select_options, const char *info,
                             Item *having);
@@ -127,40 +135,35 @@ static COND* substitute_for_best_equal_field(COND *cond,
                                              COND_EQUAL *cond_equal,
                                              void *table_join_idx);
 static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
-                            COND *conds, bool top);
+                            COND *conds, bool top, bool in_sj);
 static bool check_interleaving_with_nj(JOIN_TAB *next);
 static void restore_prev_nj_state(JOIN_TAB *last);
-static void reset_nj_counters(List<TABLE_LIST> *join_list);
+static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list);
 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
                                           uint first_unused);
 
 static COND *optimize_cond(JOIN *join, COND *conds,
                            List<TABLE_LIST> *join_list,
-			   Item::cond_result *cond_value);
-static bool open_tmp_table(TABLE *table);
-static bool create_myisam_tmp_table(TABLE *,TMP_TABLE_PARAM *, ulonglong, my_bool);
+                           Item::cond_result *cond_value, 
+                           COND_EQUAL **cond_equal);
+bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
+static bool create_internal_tmp_table_from_heap2(THD *, TABLE *,
+                                     ENGINE_COLUMNDEF *, ENGINE_COLUMNDEF **, 
+                                     int, bool, handlerton *, const char *);
 static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
 		     Procedure *proc);
 
-static enum_nested_loop_state
-evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
-                     int error);
+static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int);
 static enum_nested_loop_state
 evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab);
 static enum_nested_loop_state
-flush_cached_records(JOIN *join, JOIN_TAB *join_tab, bool skip_last);
-static enum_nested_loop_state
 end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
-end_send_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
-static enum_nested_loop_state
 end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
-static enum_nested_loop_state
-end_write_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 
 static int test_if_group_changed(List<Cached_item> &list);
 static int join_read_const_table(JOIN_TAB *tab, POSITION *pos);
@@ -174,7 +177,7 @@ static int join_no_more_records(READ_RECORD *info);
 static int join_read_next(READ_RECORD *info);
 static int join_init_quick_read_record(JOIN_TAB *tab);
 static int test_if_quick_select(JOIN_TAB *tab);
-static int join_init_read_record(JOIN_TAB *tab);
+static bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab);
 static int join_read_first(JOIN_TAB *tab);
 static int join_read_next(READ_RECORD *info);
 static int join_read_next_same(READ_RECORD *info);
@@ -185,8 +188,19 @@ static int join_ft_read_first(JOIN_TAB *tab);
 static int join_ft_read_next(READ_RECORD *info);
 int join_read_always_key_or_null(JOIN_TAB *tab);
 int join_read_next_same_or_null(READ_RECORD *info);
-static COND *make_cond_for_table(COND *cond,table_map table,
-				 table_map used_table);
+static COND *make_cond_for_table(THD *thd, Item *cond,table_map table,
+                                 table_map used_table,
+                                 uint join_tab_idx_arg,
+                                 bool exclude_expensive_cond,
+                                 bool retain_ref_cond);
+static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
+                                           Item *cond,
+                                           table_map tables,
+                                           table_map used_table,
+                                           uint join_tab_idx_arg,
+                                           bool exclude_expensive_cond,
+                                           bool retain_ref_cond);
+
 static Item* part_of_refkey(TABLE *form,Field *field);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
@@ -199,7 +213,7 @@ static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
                                      uint *saved_best_key_parts= NULL);
 static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
 				    ha_rows select_limit, bool no_changes,
-                                    key_map *map);
+                                    const key_map *map);
 static bool list_contains_unique_index(TABLE *table,
                           bool (*find_func) (Field *, void *), void *data);
 static bool find_field_in_item_list (Field *field, void *data);
@@ -213,15 +227,8 @@ static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
 				   ulong offset,Item *having);
 static int remove_dup_with_hash_index(THD *thd,TABLE *table,
 				      uint field_count, Field **first_field,
-
 				      ulong key_length,Item *having);
-static int join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count);
-static ulong used_blob_length(CACHE_FIELD **ptr);
-static bool store_record_in_cache(JOIN_CACHE *cache);
-static void reset_cache_read(JOIN_CACHE *cache);
-static void reset_cache_write(JOIN_CACHE *cache);
-static void read_cached_record(JOIN_TAB *tab);
-static bool cmp_buffer_with_ref(JOIN_TAB *tab);
+static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
 static bool setup_new_fields(THD *thd, List<Item> &fields,
 			     List<Item> &all_fields, ORDER *new_order);
 static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
@@ -229,7 +236,7 @@ static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
                                     List<Item> &all_fields,
 				    bool *all_order_by_fields_used);
 static bool test_if_subpart(ORDER *a,ORDER *b);
-static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
+static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables);
 static void calc_group_buffer(JOIN *join,ORDER *group);
 static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
 static bool alloc_group_fields(JOIN *join,ORDER *group);
@@ -253,10 +260,14 @@ static bool init_sum_functions(Item_sum **func, Item_sum **end);
 static bool update_sum_func(Item_sum **func);
 static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
 			    bool distinct, const char *message=NullS);
-static Item *remove_additional_cond(Item* conds);
 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
-static bool test_if_ref(Item_field *left_item,Item *right_item);
+static uint make_join_orderinfo(JOIN *join);
+static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array);
 
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
+JOIN_TAB *first_depth_first_tab(JOIN* join);
+JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab);
 
 /**
   This handles SELECT with and without UNION.
@@ -339,7 +350,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
         function is, in turn, aggregated in the query block where the outer
         field was resolved or some query nested therein, then the
         Item_direct_ref class should be used. Also it should be used if we are
-        grouping by a subquery containing the outer field.
+        grouping by a subquery that references this outer field.
 
     The resolution is done here and not at the fix_fields() stage as
     it can be done only after aggregate functions are fixed and pulled up to
@@ -358,11 +369,23 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
 
 bool
 fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
-                 Item **ref_pointer_array, ORDER *group_list)
+                 Item **ref_pointer_array)
 {
   Item_outer_ref *ref;
 
-  List_iterator<Item_outer_ref> ref_it(select->inner_refs_list);
+  /*
+    Mark the references from  the inner_refs_list that are occurred in
+    the group by expressions. Those references will contain direct
+    references to the referred fields. The markers are set in 
+    the found_in_group_by field of the references from the list.
+  */
+  List_iterator_fast <Item_outer_ref> ref_it(select->inner_refs_list);
+  for (ORDER *group= select->join->group_list; group;  group= group->next)
+  {
+    (*group->item)->walk(&Item::check_inner_refs_processor,
+                         TRUE, (uchar *) &ref_it);
+  } 
+    
   while ((ref= ref_it++))
   {
     bool direct_ref= false;
@@ -407,22 +430,9 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
         }
       }
     }
-    else
-    {
-      /*
-        Check if GROUP BY item trees contain the outer ref:
-        in this case we have to use Item_direct_ref instead of Item_ref.
-      */
-      for (ORDER *group= group_list; group; group= group->next)
-      {
-        if ((*group->item)->walk(&Item::find_item_processor, TRUE,
-                                 (uchar *) ref))
-        {
-          direct_ref= TRUE;
-          break;
-        }
-      }
-    }
+    else if (ref->found_in_group_by)
+      direct_ref= TRUE;
+
     new_ref= direct_ref ?
               new Item_direct_ref(ref->context, item_ref, ref->table_name,
                           ref->field_name, ref->alias_name_used) :
@@ -445,7 +455,7 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
 */
 inline int setup_without_group(THD *thd, Item **ref_pointer_array,
 			       TABLE_LIST *tables,
-			       TABLE_LIST *leaves,
+			       List<TABLE_LIST> &leaves,
 			       List<Item> &fields,
 			       List<Item> &all_fields,
 			       COND **conds,
@@ -483,6 +493,7 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array,
   mysql_select assumes that all tables are already opened
 *****************************************************************************/
 
+
 /**
   Prepare of whole select (including sub queries in future).
 
@@ -521,29 +532,70 @@ JOIN::prepare(Item ***rref_pointer_array,
   join_list= &select_lex->top_join_list;
   union_part= unit_arg->is_union();
 
+  if (select_lex->handle_derived(thd->lex, DT_PREPARE))
+    DBUG_RETURN(1);
+
   thd->lex->current_select->is_item_list_lookup= 1;
   /*
     If we have already executed SELECT, then it have not sense to prevent
     its table from update (see unique_table())
+    Affects only materialized derived tables.
   */
-  if (thd->derived_tables_processing)
-    select_lex->exclude_from_table_unique_test= TRUE;
-
   /* Check that all tables, fields, conds and order are ok */
-
   if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
       setup_tables_and_check_access(thd, &select_lex->context, join_list,
-                                    tables_list, &select_lex->leaf_tables,
-                                    FALSE, SELECT_ACL, SELECT_ACL))
+                                    tables_list, select_lex->leaf_tables,
+                                    FALSE, SELECT_ACL, SELECT_ACL, FALSE))
       DBUG_RETURN(-1);
+  
+  /*
+    TRUE if the SELECT list mixes elements with and without grouping,
+    and there is no GROUP BY clause. Mixing non-aggregated fields with
+    aggregate functions in the SELECT list is a MySQL exptenstion that
+    is allowed only if the ONLY_FULL_GROUP_BY sql mode is not set.
+  */
+  bool mixed_implicit_grouping= false;
+  if ((~thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) &&
+      select_lex->with_sum_func && !group_list)
+  {
+    List_iterator_fast <Item> select_it(fields_list);
+    Item *select_el; /* Element of the SELECT clause, can be an expression. */
+    bool found_field_elem= false;
+    bool found_sum_func_elem= false;
+
+    while ((select_el= select_it++))
+    {
+      if (select_el->with_sum_func)
+        found_sum_func_elem= true;
+      if (select_el->with_field)
+        found_field_elem= true;
+      if (found_sum_func_elem && found_field_elem)
+      {
+        mixed_implicit_grouping= true;
+        break;
+      }
+    }
+  }
+
+  table_count= select_lex->leaf_tables.elements;
  
-  TABLE_LIST *table_ptr;
-  for (table_ptr= select_lex->leaf_tables;
-       table_ptr;
-       table_ptr= table_ptr->next_leaf)
-    tables++;
+  TABLE_LIST *tbl;
+  List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
+  while ((tbl= li++))
+  {
+    //table_count++; /* Count the number of tables in the join. */
+    /*
+      If the query uses implicit grouping where the select list contains both
+      aggregate functions and non-aggregate fields, any non-aggregated field
+      may produce a NULL value. Set all fields of each table as nullable before
+      semantic analysis to take into account this change of nullability.
+    */
+    if (mixed_implicit_grouping)
+      tbl->table->maybe_null= 1;
+  }
 
-  if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num) ||
+  if ((wild_num && setup_wild(thd, tables_list, fields_list, &all_fields,
+                              wild_num)) ||
       select_lex->setup_ref_array(thd, og_num) ||
       setup_fields(thd, (*rref_pointer_array), fields_list, MARK_COLUMNS_READ,
 		   &all_fields, 1) ||
@@ -561,6 +613,13 @@ JOIN::prepare(Item ***rref_pointer_array,
     thd->where="having clause";
     thd->lex->allow_sum_func|= 1 << select_lex_arg->nest_level;
     select_lex->having_fix_field= 1;
+    /*
+      Wrap alone field in HAVING clause in case it will be outer field of subquery
+      which need persistent pointer on it, but having could be changed by optimizer
+    */
+    if (having->type() == Item::REF_ITEM &&
+        ((Item_ref *)having)->ref_type() == Item_ref::REF)
+      wrap_ident(thd, &having);
     bool having_fix_rc= (!having->fixed &&
 			 (having->fix_fields(thd, &having) ||
 			  having->check_cols(1)));
@@ -569,25 +628,13 @@ JOIN::prepare(Item ***rref_pointer_array,
       DBUG_RETURN(-1);				/* purecov: inspected */
     thd->lex->allow_sum_func= save_allow_sum_func;
   }
-
-  if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
-      !(select_options & SELECT_DESCRIBE))
-  {
-    Item_subselect *subselect;
-    /* Is it subselect? */
-    if ((subselect= select_lex->master_unit()->item))
-    {
-      Item_subselect::trans_res res;
-      if ((res= subselect->select_transformer(this)) !=
-	  Item_subselect::RES_OK)
-      {
-        select_lex->fix_prepare_information(thd, &conds, &having);
-	DBUG_RETURN((res == Item_subselect::RES_ERROR));
-      }
-    }
-  }
+  
+  int res= check_and_do_in_subquery_rewrites(this);
 
   select_lex->fix_prepare_information(thd, &conds, &having);
+  
+  if (res)
+    DBUG_RETURN(res);
 
   if (order)
   {
@@ -637,8 +684,7 @@ JOIN::prepare(Item ***rref_pointer_array,
   }
 
   if (select_lex->inner_refs_list.elements &&
-      fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array,
-                     group_list))
+      fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array))
     DBUG_RETURN(-1);
 
   if (group_list)
@@ -663,10 +709,6 @@ JOIN::prepare(Item ***rref_pointer_array,
     }
   }
 
-  if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
-    DBUG_RETURN(-1);
-  
-
   /*
     Check if there are references to un-aggregated columns when computing 
     aggregate functions with implicit grouping (there is no GROUP BY).
@@ -726,11 +768,36 @@ JOIN::prepare(Item ***rref_pointer_array,
   if (!procedure && result && result->prepare(fields_list, unit_arg))
     goto err;					/* purecov: inspected */
 
+  unit= unit_arg;
+  if (prepare_stage2())
+    goto err;
+
+  DBUG_RETURN(0); // All OK
+
+err:
+  delete procedure;                /* purecov: inspected */
+  procedure= 0;
+  DBUG_RETURN(-1);                /* purecov: inspected */
+}
+
+
+/**
+  Second phase of prepare where we collect some statistic.
+
+  @details
+  We made this part separate to be able recalculate some statistic after
+  transforming subquery on optimization phase.
+*/
+
+bool JOIN::prepare_stage2()
+{
+  bool res= TRUE;
+  DBUG_ENTER("JOIN::prepare_stage2");
+
   /* Init join struct */
   count_field_types(select_lex, &tmp_table_param, all_fields, 0);
   ref_pointer_array_size= all_fields.elements*sizeof(Item*);
   this->group= group_list != 0;
-  unit= unit_arg;
 
   if (tmp_table_param.sum_func_count && !group_list)
     implicit_grouping= TRUE;
@@ -747,97 +814,64 @@ JOIN::prepare(Item ***rref_pointer_array,
   if (alloc_func_list())
     goto err;
 
-  DBUG_RETURN(0); // All OK
-
+  res= FALSE;
 err:
-  delete procedure;				/* purecov: inspected */
-  procedure= 0;
-  DBUG_RETURN(-1);				/* purecov: inspected */
+  DBUG_RETURN(res);				/* purecov: inspected */
 }
 
 
-/*
-  Remove the predicates pushed down into the subquery
+void
+inject_jtbm_conds(JOIN *join, List<TABLE_LIST> *join_list, Item **join_where)
+{
+  TABLE_LIST *table;
+  NESTED_JOIN *nested_join;
+  List_iterator<TABLE_LIST> li(*join_list);
+  DBUG_ENTER("inject_jtbm_conds");
 
-  SYNOPSIS
-    JOIN::remove_subq_pushed_predicates()
-      where   IN  Must be NULL
-              OUT The remaining WHERE condition, or NULL
+  
+  while ((table= li++))
+  {
+    Item_in_subselect *item;
+    
+    if ((item= table->jtbm_subselect))
+    {
+      Item_in_subselect *subq_pred= item;
+      double rows;
+      double read_time;
 
-  DESCRIPTION
-    Given that this join will be executed using (unique|index)_subquery,
-    without "checking NULL", remove the predicates that were pushed down
-    into the subquery.
-
-    If the subquery compares scalar values, we can remove the condition that
-    was wrapped into trig_cond (it will be checked when needed by the subquery
-    engine)
-
-    If the subquery compares row values, we need to keep the wrapped
-    equalities in the WHERE clause: when the left (outer) tuple has both NULL
-    and non-NULL values, we'll do a full table scan and will rely on the
-    equalities corresponding to non-NULL parts of left tuple to filter out
-    non-matching records.
-
-    TODO: We can remove the equalities that will be guaranteed to be true by the
-    fact that subquery engine will be using index lookup. This must be done only
-    for cases where there are no conversion errors of significance, e.g. 257
-    that is searched in a byte. But this requires homogenization of the return 
-    codes of all Field*::store() methods.
-*/
+      subq_pred->in_strategy &= ~SUBS_IN_TO_EXISTS;
+      subq_pred->optimize(&rows, &read_time);
 
-void JOIN::remove_subq_pushed_predicates(Item **where)
-{
-  if (conds->type() == Item::FUNC_ITEM &&
-      ((Item_func *)this->conds)->functype() == Item_func::EQ_FUNC &&
-      ((Item_func *)conds)->arguments()[0]->type() == Item::REF_ITEM &&
-      ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
-      test_if_ref ((Item_field *)((Item_func *)conds)->arguments()[1],
-                   ((Item_func *)conds)->arguments()[0]))
-  {
-    *where= 0;
-    return;
-  }
-}
+      subq_pred->jtbm_read_time= read_time;
+      subq_pred->jtbm_record_count=rows;
+      subq_pred->is_jtbm_merged= TRUE;
 
+      subselect_hash_sj_engine *hash_sj_engine=
+        ((subselect_hash_sj_engine*)item->engine);
+      
+      
+      //repeat of convert_subq_to_jtbm:
+      table->table= hash_sj_engine->tmp_table;
+      table->table->pos_in_table_list= table;
 
-/*
-  Index lookup-based subquery: save some flags for EXPLAIN output
+      setup_table_map(table->table, table, table->jtbm_table_no);
 
-  SYNOPSIS
-    save_index_subquery_explain_info()
-      join_tab  Subquery's join tab (there is only one as index lookup is
-                only used for subqueries that are single-table SELECTs)
-      where     Subquery's WHERE clause
+      Item *sj_conds= hash_sj_engine->semi_join_conds;
 
-  DESCRIPTION
-    For index lookup-based subquery (i.e. one executed with
-    subselect_uniquesubquery_engine or subselect_indexsubquery_engine),
-    check its EXPLAIN output row should contain 
-      "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) 
-      "Using Where" (TAB_INFO_USING_WHERE)
-      "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
-    and set appropriate flags in join_tab->packed_info.
-*/
+      (*join_where)= and_items(*join_where, sj_conds);
+      if (!(*join_where)->fixed)
+        (*join_where)->fix_fields(join->thd, join_where);
+      //parent_join->select_lex->where= parent_join->conds;
+    }
 
-static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
-{
-  join_tab->packed_info= TAB_INFO_HAVE_VALUE;
-  if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
-    join_tab->packed_info |= TAB_INFO_USING_INDEX;
-  if (where)
-    join_tab->packed_info |= TAB_INFO_USING_WHERE;
-  for (uint i = 0; i < join_tab->ref.key_parts; i++)
-  {
-    if (join_tab->ref.cond_guards[i])
+    if ((nested_join= table->nested_join))
     {
-      join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL;
-      break;
+      inject_jtbm_conds(join, &nested_join->join_list, join_where);
     }
   }
+  DBUG_VOID_RETURN;
 }
 
-
 /**
   global select optimisation.
 
@@ -853,7 +887,11 @@ static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
 int
 JOIN::optimize()
 {
+  ulonglong select_opts_for_readinfo;
+  uint no_jbuf_after;
   DBUG_ENTER("JOIN::optimize");
+
+  do_send_rows = (unit->select_limit_cnt) ? 1 : 0;
   // to prevent double initialization on EXPLAIN
   if (optimized)
     DBUG_RETURN(0);
@@ -861,13 +899,53 @@ JOIN::optimize()
   DEBUG_SYNC(thd, "before_join_optimize");
 
   thd_proc_info(thd, "optimizing");
+
+  set_allowed_join_cache_types();
+  need_distinct= TRUE;
+
+  /* Run optimize phase for all derived tables/views used in this SELECT. */
+  if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
+    DBUG_RETURN(1);
+
+  if (select_lex->first_cond_optimization)
+  {
+    //Do it only for the first execution
+    /* Merge all mergeable derived tables/views in this SELECT. */
+    if (select_lex->handle_derived(thd->lex, DT_MERGE))
+      DBUG_RETURN(TRUE);  
+    table_count= select_lex->leaf_tables.elements;
+    select_lex->update_used_tables();
+  }
+
+  if (transform_max_min_subquery())
+    DBUG_RETURN(1); /* purecov: inspected */
+
+  if (select_lex->first_cond_optimization)
+  {
+    /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+    if (convert_join_subqueries_to_semijoins(this))
+      DBUG_RETURN(1); /* purecov: inspected */
+    /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+    select_lex->update_used_tables();
+
+    /* Save this info for the next executions */
+    if (select_lex->save_leaf_tables(thd))
+      DBUG_RETURN(1);
+  }
+  
+  eval_select_list_used_tables();
+  
+  table_count= select_lex->leaf_tables.elements;
+
+  if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
+    DBUG_RETURN(-1);
+
   row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
 	      unit->select_limit_cnt);
   /* select_limit is used to decide if we are likely to scan the whole table */
   select_limit= unit->select_limit_cnt;
   if (having || (select_options & OPTION_FOUND_ROWS))
     select_limit= HA_POS_ERROR;
-  do_send_rows = (unit->select_limit_cnt) ? 1 : 0;
   // Ignore errors of execution if option IGNORE present
   if (thd->lex->ignore)
     thd->lex->current_select->no_error= 1;
@@ -893,7 +971,8 @@ JOIN::optimize()
     }
   }
 #endif
-  SELECT_LEX *sel= thd->lex->current_select;
+
+  SELECT_LEX *sel= select_lex;
   if (sel->first_cond_optimization)
   {
     /*
@@ -910,16 +989,21 @@ JOIN::optimize()
     sel->first_cond_optimization= 0;
 
     /* Convert all outer joins to inner joins if possible */
-    conds= simplify_joins(this, join_list, conds, TRUE);
+    conds= simplify_joins(this, join_list, conds, TRUE, FALSE);
     build_bitmap_for_nested_joins(join_list, 0);
 
     sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
 
+    sel->where= conds;
+
     if (arena)
       thd->restore_active_arena(arena, &backup);
   }
+  
+  inject_jtbm_conds(this, join_list, &conds);
 
-  conds= optimize_cond(this, conds, join_list, &cond_value);   
+  conds= optimize_cond(this, conds, join_list, &cond_value, &cond_equal);
+     
   if (thd->is_error())
   {
     error= 1;
@@ -928,7 +1012,7 @@ JOIN::optimize()
   }
 
   {
-    having= optimize_cond(this, having, join_list, &having_value);
+    having= optimize_cond(this, having, join_list, &having_value, &having_equal);
     if (thd->is_error())
     {
       error= 1;
@@ -936,10 +1020,17 @@ JOIN::optimize()
       DBUG_RETURN(1);
     }
     if (select_lex->where)
+    {
       select_lex->cond_value= cond_value;
+      if (sel->where != conds && cond_value == Item::COND_OK)
+        thd->change_item_tree(&sel->where, conds);
+    }  
     if (select_lex->having)
+    {
       select_lex->having_value= having_value;
-
+      if (sel->having != having && having_value == Item::COND_OK)
+        thd->change_item_tree(&sel->having, having);    
+    }
     if (cond_value == Item::COND_FALSE || having_value == Item::COND_FALSE || 
         (!unit->select_limit_cnt && !(select_options & OPTION_FOUND_ROWS)))
     {						/* Impossible cond */
@@ -947,16 +1038,17 @@ JOIN::optimize()
                             "Impossible HAVING" : "Impossible WHERE"));
       zero_result_cause=  having_value == Item::COND_FALSE ?
                            "Impossible HAVING" : "Impossible WHERE";
-      tables= 0;
+      table_count= top_join_tab_count= 0;
       error= 0;
-      DBUG_RETURN(0);
+      goto setup_subq_exit;
     }
   }
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   {
     TABLE_LIST *tbl;
-    for (tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
+    List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
+    while ((tbl= li++))
     {
       /* 
         If tbl->embedding!=NULL that means that this table is in the inner
@@ -993,13 +1085,14 @@ JOIN::optimize()
     */
     if ((res=opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds)))
     {
+      DBUG_ASSERT(res >= 0);
       if (res == HA_ERR_KEY_NOT_FOUND)
       {
         DBUG_PRINT("info",("No matching min/max row"));
 	zero_result_cause= "No matching min/max row";
-        tables= 0;
+        table_count= top_join_tab_count= 0;
 	error=0;
-	DBUG_RETURN(0);
+        goto setup_subq_exit;
       }
       if (res > 1)
       {
@@ -1007,18 +1100,11 @@ JOIN::optimize()
         DBUG_PRINT("error",("Error from opt_sum_query"));
         DBUG_RETURN(1);
       }
-      if (res < 0)
-      {
-        DBUG_PRINT("info",("No matching min/max row"));
-        zero_result_cause= "No matching min/max row";
-        tables= 0;
-        error=0;
-        DBUG_RETURN(0);
-      }
+
       DBUG_PRINT("info",("Select tables optimized away"));
       zero_result_cause= "Select tables optimized away";
       tables_list= 0;				// All tables resolved
-      const_tables= tables;
+      const_tables= top_join_tab_count= table_count;
       /*
         Extract all table-independent conditions and replace the WHERE
         clause with them. All other conditions were computed by opt_sum_query
@@ -1032,7 +1118,8 @@ JOIN::optimize()
       if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
       {
         COND *table_independent_conds=
-          make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0);
+          make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, MAX_TABLES,
+                              FALSE, FALSE);
         DBUG_EXECUTE("where",
                      print_where(table_independent_conds,
                                  "where after opt_sum_query()",
@@ -1045,7 +1132,7 @@ JOIN::optimize()
   {
     DBUG_PRINT("info",("No tables"));
     error= 0;
-    DBUG_RETURN(0);
+    goto setup_subq_exit;
   }
   error= -1;					// Error is sent to client
   sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
@@ -1059,6 +1146,9 @@ JOIN::optimize()
     DBUG_RETURN(1);
   }
 
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
+    drop_unused_derived_keys();
+
   if (rollup.state != ROLLUP::STATE_NONE)
   {
     if (rollup_process_const_fields())
@@ -1070,7 +1160,7 @@ JOIN::optimize()
   else
   {
     /* Remove distinct if only const tables */
-    select_distinct= select_distinct && (const_tables != tables);
+    select_distinct= select_distinct && (const_tables != table_count);
   }
 
   thd_proc_info(thd, "preparing");
@@ -1088,7 +1178,7 @@ JOIN::optimize()
     zero_result_cause= "no matching row in const table";
     DBUG_PRINT("error",("Error: %s", zero_result_cause));
     error= 0;
-    DBUG_RETURN(0);
+    goto setup_subq_exit;
   }
   if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
       best_read > (double) thd->variables.max_join_size &&
@@ -1100,13 +1190,13 @@ JOIN::optimize()
   }
   if (const_tables && !thd->locked_tables_mode &&
       !(select_options & SELECT_NO_UNLOCK))
-    mysql_unlock_some_tables(thd, all_tables, const_tables);
+    mysql_unlock_some_tables(thd, table, const_tables);
   if (!conds && outer_join)
   {
     /* Handle the case where we have an OUTER JOIN without a WHERE */
     conds=new Item_int((longlong) 1,1);	// Always true
   }
-  select= make_select(*all_tables, const_table_map,
+  select= make_select(*table, const_table_map,
                       const_table_map, conds, 1, &error);
   if (error)
   {						/* purecov: inspected */
@@ -1115,8 +1205,11 @@ JOIN::optimize()
     DBUG_RETURN(1);
   }
   
-  reset_nj_counters(join_list);
-  make_outerjoin_info(this);
+  reset_nj_counters(this, join_list);
+  if (make_outerjoin_info(this))
+  {
+    DBUG_RETURN(1);
+  }
 
   /*
     Among the equal fields belonging to the same multiple equality
@@ -1135,10 +1228,11 @@ JOIN::optimize()
   }
 
   /*
-    Permorm the the optimization on fields evaluation mentioned above
+    Perform the optimization on fields evaluation mentioned above
     for all on expressions.
-  */ 
-  for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++)
+  */
+  for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); tab;
+       tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
   {
     if (*tab->on_expr_ref)
     {
@@ -1149,37 +1243,45 @@ JOIN::optimize()
     }
   }
 
-  if (conds && const_table_map != found_const_table_map &&
-      (select_options & SELECT_DESCRIBE))
-  {
-    conds=new Item_int((longlong) 0,1);	// Always false
-  }
-
   /*
-    It's necessary to check const part of HAVING cond as
-    there is a chance that some cond parts may become
-    const items after make_join_statisctics(for example
-    when Item is a reference to cost table field from
-    outer join).
-    This check is performed only for those conditions
-    which do not use aggregate functions. In such case
-    temporary table may not be used and const condition
-    elements may be lost during further having
-    condition transformation in JOIN::exec.
+    Perform the optimization on fields evaliation mentioned above
+    for all used ref items.
   */
-  if (having && const_table_map && !having->with_sum_func)
+  for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); tab;
+       tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
   {
-    having->update_used_tables();
-    having= remove_eq_conds(thd, having, &having_value);
-    if (having_value == Item::COND_FALSE)
+    uint key_copy_index=0;
+    for (uint i=0; i < tab->ref.key_parts; i++)
     {
-      having= new Item_int((longlong) 0,1);
-      zero_result_cause= "Impossible HAVING noticed after reading const tables";
-      error= 0;
-      DBUG_RETURN(0);
+      Item **ref_item_ptr= tab->ref.items+i;
+      Item *ref_item= *ref_item_ptr;
+      if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE))
+        continue;
+      COND_EQUAL *equals= tab->first_inner ? tab->first_inner->cond_equal : 
+                                             cond_equal;
+      ref_item= substitute_for_best_equal_field(ref_item, equals, map2table);
+      ref_item->update_used_tables();
+      if (*ref_item_ptr != ref_item)
+      {
+        *ref_item_ptr= ref_item;
+        Item *item= ref_item->real_item();
+        store_key *key_copy= tab->ref.key_copy[key_copy_index];
+        if (key_copy->type() == store_key::FIELD_STORE_KEY)
+        {
+          store_key_field *field_copy= ((store_key_field *)key_copy);
+          field_copy->change_source_field((Item_field *) item);
+        }
+      }
+      key_copy_index++;
     }
   }
 
+  if (conds && const_table_map != found_const_table_map &&
+      (select_options & SELECT_DESCRIBE))
+  {
+    conds=new Item_int((longlong) 0,1);	// Always false
+  }
+
   /* Cache constant expressions in WHERE, HAVING, ON clauses. */
   cache_const_exprs();
 
@@ -1187,7 +1289,8 @@ JOIN::optimize()
   {
     zero_result_cause=
       "Impossible WHERE noticed after reading const tables";
-    DBUG_RETURN(0);				// error == 0
+    select_lex->mark_const_derived(zero_result_cause);
+    goto setup_subq_exit;
   }
 
   error= -1;					/* if goto err */
@@ -1222,7 +1325,7 @@ JOIN::optimize()
 
      The FROM clause must contain a single non-constant table.
   */
-  if (tables - const_tables == 1 && (group_list || select_distinct) &&
+  if (table_count - const_tables == 1 && (group_list || select_distinct) &&
       !tmp_table_param.sum_func_count &&
       (!join_tab[const_tables].select ||
        !join_tab[const_tables].select->quick ||
@@ -1273,7 +1376,7 @@ JOIN::optimize()
     if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE)
       select_distinct=0;
   }
-  else if (select_distinct && tables - const_tables == 1 &&
+  else if (select_distinct && table_count - const_tables == 1 &&
            rollup.state == ROLLUP::STATE_NONE)
   {
     /*
@@ -1406,82 +1509,43 @@ JOIN::optimize()
     When the WITH ROLLUP modifier is present, we cannot skip temporary table
     creation for the DISTINCT clause just because there are only const tables.
   */
-  need_tmp= ((const_tables != tables &&
+  need_tmp= ((const_tables != table_count &&
 	     ((select_distinct || !simple_order || !simple_group) ||
 	      (group_list && order) ||
 	      test(select_options & OPTION_BUFFER_RESULT))) ||
              (rollup.state != ROLLUP::STATE_NONE && select_distinct));
 
-  // No cache for MATCH
-  make_join_readinfo(this,
-		     (select_options & (SELECT_DESCRIBE |
-					SELECT_NO_JOIN_CACHE)) |
-		     (select_lex->ftfunc_list->elements ?
-		      SELECT_NO_JOIN_CACHE : 0));
+  /*
+    If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
+    whose columns are required to be returned in a sorted order, then
+    the proper value for no_jbuf_after should be yielded by a call to
+    the make_join_orderinfo function.
+    Yet the current implementation of FORCE INDEX hints does not
+    allow us to do it in a clean manner.
+  */
+  no_jbuf_after= 1 ? table_count : make_join_orderinfo(this);
+
+  // Don't use join buffering when we use MATCH
+  select_opts_for_readinfo=
+    (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
+    (select_lex->ftfunc_list->elements ?  SELECT_NO_JOIN_CACHE : 0);
+
+  if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
+    DBUG_RETURN(1);
 
   /* Perform FULLTEXT search before all regular searches */
   if (!(select_options & SELECT_DESCRIBE))
     init_ftfuncs(thd, select_lex, test(order));
 
-  /*
-    is this simple IN subquery?
-  */
-  if (!group_list && !order &&
-      unit->item && unit->item->substype() == Item_subselect::IN_SUBS &&
-      tables == 1 && conds &&
-      !unit->is_union())
-  {
-    if (!having)
-    {
-      Item *where= conds;
-      if (join_tab[0].type == JT_EQ_REF &&
-	  join_tab[0].ref.items[0]->name == in_left_expr_name)
-      {
-        remove_subq_pushed_predicates(&where);
-        save_index_subquery_explain_info(join_tab, where);
-        join_tab[0].type= JT_UNIQUE_SUBQUERY;
-        error= 0;
-        DBUG_RETURN(unit->item->
-                    change_engine(new
-                                  subselect_uniquesubquery_engine(thd,
-                                                                  join_tab,
-                                                                  unit->item,
-                                                                  where)));
-      }
-      else if (join_tab[0].type == JT_REF &&
-	       join_tab[0].ref.items[0]->name == in_left_expr_name)
-      {
-	remove_subq_pushed_predicates(&where);
-        save_index_subquery_explain_info(join_tab, where);
-        join_tab[0].type= JT_INDEX_SUBQUERY;
-        error= 0;
-        DBUG_RETURN(unit->item->
-                    change_engine(new
-                                  subselect_indexsubquery_engine(thd,
-                                                                 join_tab,
-                                                                 unit->item,
-                                                                 where,
-                                                                 NULL,
-                                                                 0)));
-      }
-    } else if (join_tab[0].type == JT_REF_OR_NULL &&
-	       join_tab[0].ref.items[0]->name == in_left_expr_name &&
-               having->name == in_having_cond)
-    {
-      join_tab[0].type= JT_INDEX_SUBQUERY;
-      error= 0;
-      conds= remove_additional_cond(conds);
-      save_index_subquery_explain_info(join_tab, conds);
-      DBUG_RETURN(unit->item->
-		  change_engine(new subselect_indexsubquery_engine(thd,
-								   join_tab,
-								   unit->item,
-								   conds,
-                                                                   having,
-								   1)));
-    }
+  if (optimize_unflattened_subqueries())
+    DBUG_RETURN(1);
+  
+  int res;
+  if ((res= rewrite_to_index_subquery_engine(this)) != -1)
+    DBUG_RETURN(res);
+  if (setup_subquery_caches())
+    DBUG_RETURN(-1);
 
-  }
   /*
     Need to tell handlers that to play it safe, it should fetch all
     columns of the primary key of the tables: this is because MySQL may
@@ -1490,13 +1554,16 @@ JOIN::optimize()
   */
   if (need_tmp || select_distinct || group_list || order)
   {
-    for (uint i = const_tables; i < tables; i++)
-      join_tab[i].table->prepare_for_position();
+    for (uint i= 0; i < table_count; i++)
+    {
+      if (!(table[i]->map & const_table_map))
+        table[i]->prepare_for_position();
+    }
   }
 
   DBUG_EXECUTE("info",TEST_join(this););
 
-  if (const_tables != tables)
+  if (const_tables != table_count)
   {
     /*
       Because filesort always does a full table scan or a quick range scan
@@ -1545,7 +1612,7 @@ JOIN::optimize()
       for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
       {
         Item *item= *tmp_order->item;
-        if (item->walk(&Item::is_expensive_processor, 0, (uchar*)0))
+        if (item->is_expensive())
         {
           /* Force tmp table without sort */
           need_tmp=1; simple_order=simple_group=0;
@@ -1559,7 +1626,7 @@ JOIN::optimize()
   if (select_options & SELECT_DESCRIBE)
   {
     error= 0;
-    DBUG_RETURN(0);
+    goto derived_exit;
   }
   having= 0;
 
@@ -1574,7 +1641,6 @@ JOIN::optimize()
     single table queries, thus it is sufficient to test only the first
     join_tab element of the plan for its access method.
   */
-  bool need_distinct= TRUE;
   if (join_tab->is_using_loose_index_scan())
   {
     tmp_table_param.precomputed_group_by= TRUE;
@@ -1585,6 +1651,40 @@ JOIN::optimize()
     }
   }
 
+  error= 0;
+  DBUG_RETURN(0);
+
+setup_subq_exit:
+  /* Choose an execution strategy for this JOIN. */
+  if (!tables_list || !table_count)
+    choose_tableless_subquery_plan();
+  /*
+    Even with zero matching rows, subqueries in the HAVING clause may
+    need to be evaluated if there are aggregate functions in the query.
+  */
+  if (optimize_unflattened_subqueries())
+    DBUG_RETURN(1);
+  error= 0;
+
+derived_exit:
+  select_lex->mark_const_derived(zero_result_cause);
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Create and initialize objects neeed for the execution of a query plan.
+  Evaluate constant expressions not evaluated during optimization.
+*/
+
+int JOIN::init_execution()
+{
+  DBUG_ENTER("JOIN::init_execution");
+
+  DBUG_ASSERT(optimized);
+  DBUG_ASSERT(!(select_options & SELECT_DESCRIBE));
+  initialized= true;
+
   /* Create a tmp table if distinct or if the sort is too complicated */
   if (need_tmp)
   {
@@ -1675,8 +1775,8 @@ JOIN::optimize()
 
     if (exec_tmp_table1->distinct)
     {
-      table_map used_tables= thd->lex->used_tables;
-      JOIN_TAB *last_join_tab= join_tab+tables-1;
+      table_map used_tables= select_list_used_tables;
+      JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1;
       do
       {
 	if (used_tables & last_join_tab->table->map)
@@ -1700,12 +1800,101 @@ JOIN::optimize()
       DBUG_RETURN(-1);                         /* purecov: inspected */
   }
 
-  error= 0;
   DBUG_RETURN(0);
 }
 
 
 /**
+  Setup expression caches for subqueries that need them
+
+  @details
+  The function wraps correlated subquery expressions that return one value
+  into objects of the class Item_cache_wrapper setting up an expression
+  cache for each of them. The result values of the subqueries are to be
+  cached together with the corresponding sets of the parameters - outer
+  references of the subqueries.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+bool JOIN::setup_subquery_caches()
+{
+  DBUG_ENTER("JOIN::setup_subquery_caches");
+
+  /*
+    We have to check all this condition together because items created in
+    one of this clauses can be moved to another one by optimizer
+  */
+  if (select_lex->expr_cache_may_be_used[IN_WHERE] ||
+      select_lex->expr_cache_may_be_used[IN_HAVING] ||
+      select_lex->expr_cache_may_be_used[IN_ON] ||
+      select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    if (conds)
+      conds= conds->transform(&Item::expr_cache_insert_transformer,
+                              (uchar*) thd);
+    for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); 
+         tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+    {
+      if (tab->select_cond)
+        tab->select_cond=
+          tab->select_cond->transform(&Item::expr_cache_insert_transformer,
+                                      (uchar*) thd);
+      if (tab->cache_select && tab->cache_select->cond)
+        tab->cache_select->cond=
+          tab->cache_select->
+          cond->transform(&Item::expr_cache_insert_transformer,
+                          (uchar*) thd);
+
+    }
+
+    if (having)
+      having= having->transform(&Item::expr_cache_insert_transformer,
+                                (uchar*) thd);
+    if (tmp_having)
+    {
+      DBUG_ASSERT(having == NULL);
+      tmp_having= tmp_having->transform(&Item::expr_cache_insert_transformer,
+                                        (uchar*) thd);
+    }
+  }
+  if (select_lex->expr_cache_may_be_used[SELECT_LIST] ||
+      select_lex->expr_cache_may_be_used[IN_GROUP_BY] ||
+      select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    List_iterator<Item> li(all_fields);
+    Item *item;
+    while ((item= li++))
+    {
+      Item *new_item=
+        item->transform(&Item::expr_cache_insert_transformer, (uchar*) thd);
+      if (new_item != item)
+      {
+        thd->change_item_tree(li.ref(), new_item);
+      }
+    }
+    for (ORDER *group= group_list; group ; group= group->next)
+    {
+      *group->item=
+        (*group->item)->transform(&Item::expr_cache_insert_transformer,
+                                  (uchar*) thd);
+    }
+  }
+  if (select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    for (ORDER *ord= order; ord; ord= ord->next)
+    {
+      *ord->item=
+        (*ord->item)->transform(&Item::expr_cache_insert_transformer,
+                                (uchar*) thd);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
   Restore values in temporary join.
 */
 void JOIN::restore_tmp()
@@ -1714,6 +1903,62 @@ void JOIN::restore_tmp()
 }
 
 
+/*
+  Shrink join buffers used for preceding tables to reduce the occupied space
+
+  SYNOPSIS
+    shrink_join_buffers()
+      jt           table up to which the buffers are to be shrunk
+      curr_space   the size of the space used by the buffers for tables 1..jt
+      needed_space the size of the space that has to be used by these buffers
+
+  DESCRIPTION
+    The function makes an attempt to shrink all join buffers used for the
+    tables starting from the first up to jt to reduce the total size of the
+    space occupied by the buffers used for tables 1,...,jt  from curr_space
+    to needed_space.
+    The function assumes that the buffer for the table jt has not been
+    allocated yet.
+
+  RETURN
+    FALSE     if all buffer have been successfully shrunk
+    TRUE      otherwise
+*/
+  
+bool JOIN::shrink_join_buffers(JOIN_TAB *jt, 
+                               ulonglong curr_space,
+                               ulonglong needed_space)
+{
+  JOIN_CACHE *cache;
+  for (JOIN_TAB *tab= join_tab+const_tables; tab < jt; tab++)
+  {
+    cache= tab->cache;
+    if (cache)
+    { 
+      size_t buff_size;
+      if (needed_space < cache->get_min_join_buffer_size())
+        return TRUE;
+      if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space))
+      { 
+        revise_cache_usage(tab);
+        return TRUE;
+      }
+      buff_size= cache->get_join_buffer_size();
+      curr_space-= buff_size;
+      needed_space-= buff_size;
+    }
+  }
+
+  cache= jt->cache;
+  DBUG_ASSERT(cache);
+  if (needed_space < cache->get_min_join_buffer_size())
+    return TRUE;
+  cache->set_join_buffer_size((size_t)needed_space);
+  
+  return FALSE;
+}
+
+
 int
 JOIN::reinit()
 {
@@ -1739,16 +1984,22 @@ JOIN::reinit()
     free_io_cache(exec_tmp_table2);
     filesort_free_buffers(exec_tmp_table2,0);
   }
+  clear_sj_tmp_tables(this);
   if (items0)
     set_items_ref_array(items0);
 
   if (join_tab_save)
-    memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * tables);
+    memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * table_count);
 
   /* need to reset ref access state (see join_read_key) */
   if (join_tab)
-    for (uint i= 0; i < tables; i++)
-      join_tab[i].ref.key_err= TRUE;
+  {
+    for (JOIN_TAB *tab= first_linear_tab(this, WITH_CONST_TABLES); tab; 
+         tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+    {
+      tab->ref.key_err= TRUE;
+    }
+  }
 
   if (tmp_join)
     restore_tmp();
@@ -1761,6 +2012,16 @@ JOIN::reinit()
       func->clear();
   }
 
+  if (no_rows_in_result_called)
+  {
+    /* Reset effect of possible no_rows_in_result() */
+    List_iterator_fast<Item> it(fields_list);
+    Item *item;
+    no_rows_in_result_called= 0;
+    while ((item= it++))
+      item->restore_to_before_no_rows_in_result();
+  }
+
   if (!(select_options & SELECT_DESCRIBE))
     init_ftfuncs(thd, select_lex, test(order));
 
@@ -1795,7 +2056,7 @@ JOIN::save_join_tab()
   if (!join_tab_save && select_lex->master_unit()->uncacheable)
   {
     if (!(join_tab_save= (JOIN_TAB*)thd->memdup((uchar*) join_tab,
-						sizeof(JOIN_TAB) * tables)))
+						sizeof(JOIN_TAB) * table_count)))
       return 1;
   }
   return 0;
@@ -1835,7 +2096,7 @@ JOIN::exec()
   }
   (void) result->prepare2(); // Currently, this cannot fail.
 
-  if (!tables_list && (tables || !select_lex->with_sum_func))
+  if (!tables_list && (table_count || !select_lex->with_sum_func))
   {                                           // Only test of functions
     if (select_options & SELECT_DESCRIBE)
       select_describe(this, FALSE, FALSE, FALSE,
@@ -1863,7 +2124,7 @@ JOIN::exec()
       {
 	if (do_send_rows &&
             (procedure ? (procedure->send_row(procedure_fields_list) ||
-             procedure->end_of_records()) : result->send_data(fields_list)))
+             procedure->end_of_records()) : result->send_data(fields_list)> 0))
 	  error= 1;
 	else
 	{
@@ -1888,9 +2149,19 @@ JOIN::exec()
     FOUND_ROWS() may be called. Never reset the examined row count here.
     It must be accumulated from all join iterations of all join parts.
   */
-  if (tables)
+  if (table_count)
     thd->limit_found_rows= 0;
 
+  /*
+    Evaluate expensive constant conditions that were not evaluated during
+    optimization. Do not evaluate them for EXPLAIN statements as these
+    condtions may be arbitrarily costly, and because the optimize phase
+    might not have produced a complete executable plan for EXPLAINs.
+  */
+  if (exec_const_cond && !(select_options & SELECT_DESCRIBE) &&
+      !exec_const_cond->val_int())
+    zero_result_cause= "Impossible WHERE noticed after reading const tables";
+
   if (zero_result_cause)
   {
     (void) return_zero_rows(this, result, select_lex->leaf_tables,
@@ -1898,10 +2169,31 @@ JOIN::exec()
 			    send_row_on_empty_set(),
 			    select_options,
 			    zero_result_cause,
-			    having);
+			    having ? having : tmp_having);
     DBUG_VOID_RETURN;
   }
 
+  /*
+    Evaluate all constant expressions with subqueries in the ORDER/GROUP clauses
+    to make sure that all subqueries return a single row. The evaluation itself
+    will trigger an error if that is not the case.
+  */
+  if (exec_const_order_group_cond.elements &&
+      !(select_options & SELECT_DESCRIBE))
+  {
+    List_iterator_fast<Item> const_item_it(exec_const_order_group_cond);
+    Item *cur_const_item;
+    while ((cur_const_item= const_item_it++))
+    {
+      cur_const_item->val_str(&cur_const_item->str_value);
+      if (thd->is_error())
+      {
+        error= thd->is_error();
+        DBUG_VOID_RETURN;
+      }
+    }
+  }
+
   if ((this->select_lex->options & OPTION_SCHEMA_TABLE) &&
       get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
     DBUG_VOID_RETURN;
@@ -1925,7 +2217,7 @@ JOIN::exec()
     }
     if (order && 
         (order != group_list || !(select_options & SELECT_BIG_RESULT)) &&
-	(const_tables == tables ||
+	(const_tables == table_count ||
  	 ((simple_order || skip_sort_order) &&
 	  test_if_skip_sort_order(&join_tab[const_tables], order,
 				  select_limit, 0, 
@@ -1936,9 +2228,17 @@ JOIN::exec()
     select_describe(this, need_tmp,
 		    order != 0 && !skip_sort_order,
 		    select_distinct,
-                    !tables ? "No tables used" : NullS);
+                    !table_count ? "No tables used" : NullS);
     DBUG_VOID_RETURN;
   }
+  else
+  {
+    /* it's a const select, materialize it. */
+    select_lex->mark_const_derived(zero_result_cause);
+  }
+
+  if (!initialized && init_execution())
+    DBUG_VOID_RETURN;
 
   JOIN *curr_join= this;
   List<Item> *curr_all_fields= &all_fields;
@@ -1967,11 +2267,14 @@ JOIN::exec()
     curr_tmp_table= exec_tmp_table1;
 
     /* Copy data to the temporary table */
-    thd_proc_info(thd, "Copying to tmp table");
+    thd_proc_info(thd, copy_to_tmp_table);
     DBUG_PRINT("info", ("%s", thd->proc_info));
     if (!curr_join->sort_and_group &&
-        curr_join->const_tables != curr_join->tables)
-      curr_join->join_tab[curr_join->const_tables].sorted= 0;
+        curr_join->const_tables != curr_join->table_count)
+    {
+      JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
+      first_tab->sorted= test(first_tab->loosescan_match_tab);
+    }
 
     Procedure *save_proc= curr_join->procedure;
     tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0);
@@ -1987,7 +2290,9 @@ JOIN::exec()
       curr_join->having= curr_join->tmp_having= 0; // Allready done
     
     /* Change sum_fields reference to calculated fields in tmp_table */
+#ifdef HAVE_valgrind
     if (curr_join != this)
+#endif
       curr_join->all_fields= *curr_all_fields;
     if (!items1)
     {
@@ -2007,7 +2312,9 @@ JOIN::exec()
 				      fields_list.elements, all_fields))
 	  DBUG_VOID_RETURN;
       }
+#ifdef HAVE_valgrind
       if (curr_join != this)
+#endif
       {
         curr_join->tmp_all_fields1= tmp_all_fields1;
         curr_join->tmp_fields_list1= tmp_fields_list1;
@@ -2053,8 +2360,8 @@ JOIN::exec()
     */
 
     if ((curr_join->group_list && (!test_if_subpart(curr_join->group_list,
-						   curr_join->order) || 
-				  curr_join->select_distinct)) ||
+                                                    curr_join->order) || 
+                                   curr_join->select_distinct)) ||
 	(curr_join->select_distinct &&
 	 curr_join->tmp_table_param.using_indirect_summary_function))
     {					/* Must copy to another table */
@@ -2139,8 +2446,11 @@ JOIN::exec()
         DBUG_VOID_RETURN;
       curr_join->group_list= 0;
       if (!curr_join->sort_and_group &&
-          curr_join->const_tables != curr_join->tables)
-        curr_join->join_tab[curr_join->const_tables].sorted= 0;
+          curr_join->const_tables != curr_join->table_count)
+      {
+        JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
+        first_tab->sorted= test(first_tab->loosescan_match_tab);
+      }
       if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
 	  (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
 				0)))
@@ -2149,7 +2459,7 @@ JOIN::exec()
 	DBUG_VOID_RETURN;
       }
       end_read_record(&curr_join->join_tab->read_record);
-      curr_join->const_tables= curr_join->tables; // Mark free for cleanup()
+      curr_join->const_tables= curr_join->table_count; // Mark free for cleanup()
       curr_join->join_tab[0].table= 0;           // Table is freed
       
       // No sum funcs anymore
@@ -2160,7 +2470,13 @@ JOIN::exec()
 				     tmp_fields_list2, tmp_all_fields2, 
 				     fields_list.elements, tmp_all_fields1))
 	  DBUG_VOID_RETURN;
+#ifdef HAVE_valgrind
+        /*
+          Some GCCs use memcpy() for struct assignment, even for x=x.
+          GCC bug 19410: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
+        */
         if (curr_join != this)
+#endif
         {
           curr_join->tmp_fields_list2= tmp_fields_list2;
           curr_join->tmp_all_fields2= tmp_all_fields2;
@@ -2220,7 +2536,9 @@ JOIN::exec()
       tmp_table_param.save_copy_field= curr_join->tmp_table_param.copy_field;
       tmp_table_param.save_copy_field_end=
 	curr_join->tmp_table_param.copy_field_end;
+#ifdef HAVE_valgrind
       if (curr_join != this)
+#endif
       {
         curr_join->tmp_all_fields3= tmp_all_fields3;
         curr_join->tmp_fields_list3= tmp_fields_list3;
@@ -2261,9 +2579,10 @@ JOIN::exec()
       table_map used_tables= (curr_join->const_table_map |
 			      curr_table->table->map);
 
-      Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having,
+      Item* sort_table_cond= make_cond_for_table(thd, curr_join->tmp_having,
 						 used_tables,
-						 (table_map) 0);
+						 (table_map)0, MAX_TABLES,
+						 FALSE, FALSE);
       if (sort_table_cond)
       {
 	if (!curr_table->select)
@@ -2277,16 +2596,31 @@ JOIN::exec()
 		new Item_cond_and(curr_table->select->cond,
 				  sort_table_cond)))
 	    DBUG_VOID_RETURN;
-	  curr_table->select->cond->fix_fields(thd, 0);
 	}
-	curr_table->select_cond= curr_table->select->cond;
+        if (curr_table->pre_idx_push_select_cond)
+	{
+          if (sort_table_cond->type() == Item::COND_ITEM)
+            sort_table_cond= sort_table_cond->copy_andor_structure(thd);           
+          if (!(curr_table->pre_idx_push_select_cond= 
+                new Item_cond_and(curr_table->pre_idx_push_select_cond,
+                                  sort_table_cond)))
+            DBUG_VOID_RETURN;            
+        }
+        if (curr_table->select->cond && !curr_table->select->cond->fixed)
+	  curr_table->select->cond->fix_fields(thd, 0);
+        if (curr_table->pre_idx_push_select_cond &&
+            !curr_table->pre_idx_push_select_cond->fixed)
+          curr_table->pre_idx_push_select_cond->fix_fields(thd, 0);
+          
+        curr_table->set_select_cond(curr_table->select->cond, __LINE__);
 	curr_table->select_cond->top_level_item();
 	DBUG_EXECUTE("where",print_where(curr_table->select->cond,
 					 "select and having",
                                          QT_ORDINARY););
-	curr_join->tmp_having= make_cond_for_table(curr_join->tmp_having,
+	curr_join->tmp_having= make_cond_for_table(thd, curr_join->tmp_having,
 						   ~ (table_map) 0,
-						   ~used_tables);
+						   ~used_tables, MAX_TABLES,
+						   FALSE, FALSE);
 	DBUG_EXECUTE("where",print_where(curr_join->tmp_having,
                                          "having after sort",
                                          QT_ORDINARY););
@@ -2302,7 +2636,7 @@ JOIN::exec()
 	  WHERE clause for any tables after the sorted one.
 	*/
 	JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables+1];
-	JOIN_TAB *end_table= &curr_join->join_tab[curr_join->tables];
+	JOIN_TAB *end_table= &curr_join->join_tab[curr_join->top_join_tab_count];
 	for (; curr_table < end_table ; curr_table++)
 	{
 	  /*
@@ -2340,7 +2674,7 @@ JOIN::exec()
                             curr_join->group_list ? TRUE : FALSE))
 	DBUG_VOID_RETURN;
       sortorder= curr_join->sortorder;
-      if (curr_join->const_tables != curr_join->tables &&
+      if (curr_join->const_tables != curr_join->table_count &&
           !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache)
       {
         /*
@@ -2407,9 +2741,11 @@ JOIN::destroy()
   {
     if (join_tab != tmp_join->join_tab)
     {
-      JOIN_TAB *tab, *end;
-      for (tab= join_tab, end= tab+tables ; tab != end ; tab++)
+      for (JOIN_TAB *tab= first_linear_tab(this, WITH_CONST_TABLES); tab; 
+           tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+      {
 	tab->cleanup();
+      }
     }
     tmp_join->tmp_join= 0;
     /*
@@ -2418,10 +2754,11 @@ JOIN::destroy()
       anywhere else (as we need to keep the join is reusable).
     */
     tmp_table_param.cleanup();
-    tmp_table_param.copy_field= tmp_join->tmp_table_param.copy_field= 0;
+    tmp_join->tmp_table_param.copy_field= 0;
     DBUG_RETURN(tmp_join->destroy());
   }
   cond_equal= 0;
+  having_equal= 0;
 
   cleanup(1);
  /* Cleanup items referencing temporary table columns */
@@ -2432,6 +2769,7 @@ JOIN::destroy()
   if (exec_tmp_table2)
     free_tmp_table(thd, exec_tmp_table2);
   delete select;
+  destroy_sj_tmp_tables(this);
   delete_dynamic(&keyuse);
   delete procedure;
   DBUG_RETURN(error);
@@ -2440,6 +2778,7 @@ JOIN::destroy()
 
 void JOIN::cleanup_item_list(List<Item> &items) const
 {
+  DBUG_ENTER("JOIN::cleanup_item_list");
   if (!items.is_empty())
   {
     List_iterator_fast<Item> it(items);
@@ -2447,6 +2786,7 @@ void JOIN::cleanup_item_list(List<Item> &items) const
     while ((item= it++))
       item->cleanup();
   }
+  DBUG_VOID_RETURN;
 }
 
 
@@ -2535,10 +2875,9 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
       }
       else
       {
-        err= join->prepare(rref_pointer_array, tables, wild_num,
-                           conds, og_num, order, group, having, proc_param,
-                           select_lex, unit);
-        if (err)
+        if ((err= join->prepare(rref_pointer_array, tables, wild_num,
+                                conds, og_num, order, group, having,
+                                proc_param, select_lex, unit)))
 	{
 	  goto err;
 	}
@@ -2549,14 +2888,20 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
   }
   else
   {
+    /*
+      When in EXPLAIN, delay deleting the joins so that they are still
+      available when we're producing EXPLAIN EXTENDED warning text.
+    */
+    if (select_options & SELECT_DESCRIBE)
+      free_join= 0;
+
     if (!(join= new JOIN(thd, fields, select_options, result)))
 	DBUG_RETURN(TRUE);
     thd_proc_info(thd, "init");
     thd->lex->used_tables=0;                         // Updated by setup_fields
-    err= join->prepare(rref_pointer_array, tables, wild_num,
-                       conds, og_num, order, group, having, proc_param,
-                       select_lex, unit);
-    if (err)
+    if ((err= join->prepare(rref_pointer_array, tables, wild_num,
+                            conds, og_num, order, group, having, proc_param,
+                            select_lex, unit)))
     {
       goto err;
     }
@@ -2594,6 +2939,7 @@ err:
   DBUG_RETURN(join->error);
 }
 
+
 /*****************************************************************************
   Create JOIN_TABS, make a guess about the table types,
   Approximate how many records will be used in each table
@@ -2611,8 +2957,9 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
   if (select)
   {
     select->head=table;
+    table->reginfo.impossible_range=0;
     if ((error= select->test_quick_select(thd, *(key_map *)keys,(table_map) 0,
-                                          limit, 0)) == 1)
+                                          limit, 0, FALSE)) == 1)
       DBUG_RETURN(select->quick->records);
     if (error == -1)
     {
@@ -2638,6 +2985,7 @@ typedef struct st_sargable_param
   uint num_values;           /* number of values in the above array      */
 } SARGABLE_PARAM;  
 
+
 /**
   Calculate the best possible join and initialize the join structure.
 
@@ -2648,12 +2996,11 @@ typedef struct st_sargable_param
 */
 
 static bool
-make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
-		     DYNAMIC_ARRAY *keyuse_array)
+make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
+                     COND *conds, DYNAMIC_ARRAY *keyuse_array)
 {
-  int error;
+  int error= 0;
   TABLE *table;
-  TABLE_LIST *tables= tables_arg;
   uint i,table_count,const_count,key;
   table_map found_const_table_map, all_table_map, found_ref, refs;
   key_map const_ref, eq_part;
@@ -2661,12 +3008,17 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
   JOIN_TAB *stat,*stat_end,*s,**stat_ref;
   KEYUSE *keyuse,*start_keyuse;
   table_map outer_join=0;
+  table_map no_rows_const_tables= 0;
   SARGABLE_PARAM *sargables= 0;
   JOIN_TAB *stat_vector[MAX_TABLES+1];
+  List_iterator<TABLE_LIST> ti(tables_list);
+  TABLE_LIST *tables;
   DBUG_ENTER("make_join_statistics");
 
-  table_count=join->tables;
-  stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*table_count);
+  LINT_INIT(table); /* inited in all loops */
+  table_count=join->table_count;
+
+  stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*(table_count));
   stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES);
   table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*(table_count*2));
   if (!stat || !stat_ref || !table_vector)
@@ -2678,9 +3030,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
   found_const_table_map= all_table_map=0;
   const_count=0;
 
-  for (s= stat, i= 0;
-       tables;
-       s++, tables= tables->next_leaf, i++)
+  for (s= stat, i= 0; (tables= ti++); s++, i++)
   {
     TABLE_LIST *embedding= tables->embedding;
     stat_vector[i]=s;
@@ -2690,16 +3040,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     s->needed_reg.init();
     table_vector[i]=s->table=table=tables->table;
     table->pos_in_table_list= tables;
-    error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
-
-    DBUG_EXECUTE_IF("bug11747970_raise_error",
-                    {
-                      if (!error)
-                      {
-                        my_error(ER_UNKNOWN_ERROR, MYF(0));
-                        goto error;
-                      }
-                    });
+    error= tables->fetch_number_of_rows();
 
     if (error)
     {
@@ -2707,15 +3048,15 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       goto error;
     }
     table->quick_keys.clear_all();
+    table->intersect_keys.clear_all();
     table->reginfo.join_tab=s;
     table->reginfo.not_exists_optimize=0;
     bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
     all_table_map|= table->map;
+    s->preread_init_done= FALSE;
     s->join=join;
-    s->info=0;					// For describe
 
     s->dependent= tables->dep_tables;
-    s->key_dependent= 0;
     if (tables->schema_table)
       table->file->stats.records= 2;
     table->quick_condition_rows= table->file->stats.records;
@@ -2725,12 +3066,15 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       /* s is the only inner table of an outer join */
 #ifdef WITH_PARTITION_STORAGE_ENGINE
-      if ((!table->file->stats.records || table->no_partitions_used) && !embedding)
+      if (!table->is_filled_at_execution() &&
+           (!table->file->stats.records || table->no_partitions_used) && !embedding)
 #else
-      if (!table->file->stats.records && !embedding)
+      if (!table->is_filled_at_execution() &&
+          !table->file->stats.records && !embedding)
 #endif
       {						// Empty table
         s->dependent= 0;                        // Ignore LEFT JOIN depend.
+        no_rows_const_tables |= table->map;
 	set_position(join,const_count++,s,(KEYUSE*) 0);
 	continue;
       }
@@ -2744,8 +3088,19 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       /* s belongs to a nested join, maybe to several embedded joins */
       s->embedding_map= 0;
+      bool inside_an_outer_join= FALSE;
       do
       {
+        /* 
+          If this is a semi-join nest, skip it, and proceed upwards. Maybe
+          we're in some outer join nest
+        */
+        if (embedding->sj_on_expr)
+        {
+          embedding= embedding->embedding;
+          continue;
+        }
+        inside_an_outer_join= TRUE;
         NESTED_JOIN *nested_join= embedding->nested_join;
         s->embedding_map|=nested_join->nj_map;
         s->dependent|= embedding->dep_tables;
@@ -2753,22 +3108,26 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
         outer_join|= nested_join->used_tables;
       }
       while (embedding);
-      continue;
+      if (inside_an_outer_join)
+        continue;
     }
 #ifdef WITH_PARTITION_STORAGE_ENGINE
     const bool no_partitions_used= table->no_partitions_used;
 #else
     const bool no_partitions_used= FALSE;
 #endif
-    if ((table->s->system || table->file->stats.records <= 1 ||
+    if (!table->is_filled_at_execution() && 
+        (table->s->system || table->file->stats.records <= 1 ||
          no_partitions_used) &&
 	!s->dependent &&
 	(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
         !table->fulltext_searched && !join->no_const_tables)
     {
       set_position(join,const_count++,s,(KEYUSE*) 0);
+      no_rows_const_tables |= table->map;
     }
   }
+
   stat_vector[i]=0;
   join->outer_join=outer_join;
 
@@ -2777,83 +3136,98 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     /* 
        Build transitive closure for relation 'to be dependent on'.
        This will speed up the plan search for many cases with outer joins,
-       as well as allow us to catch illegal cross references.
+       as well as allow us to catch illegal cross references/
        Warshall's algorithm is used to build the transitive closure.
-       As we may restart the outer loop upto 'table_count' times, the
-       complexity of the algorithm is O((number of tables)^3).
-       However, most of the iterations will be shortcircuited when
-       there are no pedendencies to propogate.
+       As we use bitmaps to represent the relation the complexity
+       of the algorithm is O((number of tables)^2).
+
+       The classic form of the Warshall's algorithm would look like: 
+       for (i= 0; i < table_count; i++)
+       {
+         for (j= 0; j < table_count; j++)
+         {
+           for (k= 0; k < table_count; k++)
+           {
+             if (bitmap_is_set(stat[j].dependent, i) &&
+                 bitmap_is_set(stat[i].dependent, k))
+               bitmap_set_bit(stat[j].dependent, k);
+           }
+         }
+       }  
     */
-    for (i= 0 ; i < table_count ; i++)
+    
+    for (s= stat ; s < stat_end ; s++)
     {
-      uint j;
-      table= stat[i].table;
-
-      if (!table->reginfo.join_tab->dependent)
-        continue;
-
-      /* Add my dependencies to other tables depending on me */
-      for (j= 0, s= stat ; j < table_count ; j++, s++)
+      table= s->table;
+      for (JOIN_TAB *t= stat ; t < stat_end ; t++)
       {
-        if (s->dependent & table->map)
-        {
-          table_map was_dependent= s->dependent;
-          s->dependent |= table->reginfo.join_tab->dependent;
-          /*
-            If we change dependencies for a table we already have
-            processed: Redo dependency propagation from this table.
-          */
-          if (i > j && s->dependent != was_dependent)
-          {
-            i = j-1;
-            break;
-          }
-        }
+        if (t->dependent & table->map)
+          t->dependent |= table->reginfo.join_tab->dependent;
       }
+      if (outer_join & s->table->map)
+        s->table->maybe_null= 1;
     }
-
+    /* Catch illegal cross references for outer joins */
     for (i= 0, s= stat ; i < table_count ; i++, s++)
     {
-      /* Catch illegal cross references for outer joins */
       if (s->dependent & s->table->map)
       {
-        join->tables=0;			// Don't use join->table
+        join->table_count=0;			// Don't use join->table
         my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
         goto error;
       }
-
-      if (outer_join & s->table->map)
-        s->table->maybe_null= 1;
       s->key_dependent= s->dependent;
     }
   }
 
   if (conds || outer_join)
-    if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables,
-                            conds, join->cond_equal,
-                            ~outer_join, join->select_lex, &sargables))
+  {
+    if (update_ref_and_keys(join->thd, keyuse_array, stat, join->table_count,
+                            conds, ~outer_join, join->select_lex, &sargables))
       goto error;
+    /*
+      Keyparts without prefixes may be useful if this JOIN is a subquery, and
+      if the subquery may be executed via the IN-EXISTS strategy.
+    */
+    bool skip_unprefixed_keyparts=
+      !(join->is_in_subquery() &&
+        ((Item_in_subselect*)join->unit->item)->in_strategy & SUBS_IN_TO_EXISTS);
 
-  /* Read tables with 0 or 1 rows (system tables) */
-  join->const_table_map= 0;
+    if (keyuse_array->elements &&
+        sort_and_filter_keyuse(join->thd, keyuse_array,
+                               skip_unprefixed_keyparts))
+      goto error;
+    DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
+  }
 
+  join->const_table_map= no_rows_const_tables;
+  join->const_tables= const_count;
+  eliminate_tables(join);
+  join->const_table_map &= ~no_rows_const_tables;
+  const_count= join->const_tables;
+  found_const_table_map= join->const_table_map;
+
+  /* Read tables with 0 or 1 rows (system tables) */
   for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count;
        p_pos < p_end ;
        p_pos++)
   {
-    int tmp;
     s= p_pos->table;
-    s->type=JT_SYSTEM;
-    join->const_table_map|=s->table->map;
-    if ((tmp=join_read_const_table(s, p_pos)))
+    if (! (s->table->map & join->eliminated_tables))
     {
-      if (tmp > 0)
-	goto error;		// Fatal error
-    }
-    else
-    {
-      found_const_table_map|= s->table->map;
-      s->table->pos_in_table_list->optimized_away= TRUE;
+      int tmp;
+      s->type=JT_SYSTEM;
+      join->const_table_map|=s->table->map;
+      if ((tmp=join_read_const_table(s, p_pos)))
+      {
+        if (tmp > 0)
+          goto error;		// Fatal error
+      }
+      else
+      {
+        found_const_table_map|= s->table->map;
+        s->table->pos_in_table_list->optimized_away= TRUE;
+      }
     }
   }
 
@@ -2874,12 +3248,16 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       table=s->table;
 
+      if (table->is_filled_at_execution())
+        continue;
+
       /* 
         If equi-join condition by a key is null rejecting and after a
         substitution of a const table the key value happens to be null
         then we can state that there are no matches for this equi-join.
       */  
-      if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map)
+      if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map &&
+         !(table->map & join->eliminated_tables))
       {
         /* 
           When performing an outer join operation if there are no matching rows
@@ -2893,7 +3271,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	*/              
         while (keyuse->table == table)
         {
-          if (!(keyuse->val->used_tables() & ~join->const_table_map) &&
+          if (!keyuse->is_for_hash_join() && 
+              !(keyuse->val->used_tables() & ~join->const_table_map) &&
               keyuse->val->is_null() && keyuse->null_rejecting)
           {
             s->type= JT_CONST;
@@ -2914,7 +3293,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	  continue;
 	if (table->file->stats.records <= 1L &&
 	    (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
-            !table->pos_in_table_list->embedding)
+            !table->pos_in_table_list->embedding &&
+	      !((outer_join & table->map) && 
+		(*s->on_expr_ref)->is_expensive()))
 	{					// system table
 	  int tmp= 0;
 	  s->type=JT_SYSTEM;
@@ -2936,9 +3317,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	s->type= JT_REF;
 	while (keyuse->table == table)
 	{
+          if (keyuse->is_for_hash_join())
+	  {
+            keyuse++;
+            continue;
+          }
 	  start_keyuse=keyuse;
 	  key=keyuse->key;
-	  s->keys.set_bit(key);               // QQ: remove this ?
+	  s->keys.set_bit(key);               // TODO: remove this ?
 
 	  refs=0;
           const_ref.clear_all();
@@ -2956,13 +3342,22 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	    keyuse++;
 	  } while (keyuse->table == table && keyuse->key == key);
 
+          TABLE_LIST *embedding= table->pos_in_table_list->embedding;
+          /*
+            TODO (low priority): currently we ignore the const tables that
+            are within a semi-join nest which is within an outer join nest.
+            The effect of this is that we don't do const substitution for
+            such tables.
+          */
 	  if (eq_part.is_prefix(table->key_info[key].key_parts) &&
               !table->fulltext_searched && 
-              !table->pos_in_table_list->embedding)
+              (!embedding || (embedding->sj_on_expr && !embedding->embedding)))
 	  {
             if (table->key_info[key].flags & HA_NOSAME)
             {
-	      if (const_ref == eq_part)
+	      if (const_ref == eq_part &&
+                  !((outer_join & table->map) &&
+                    (*s->on_expr_ref)->is_expensive()))
 	      {					// Found everything for ref.
 	        int tmp;
 	        ref_changed = 1;
@@ -2992,7 +3387,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       }
     }
   } while (join->const_table_map & found_ref && ref_changed);
-
+ 
   /* 
     Update info on indexes that can be used for search lookups as
     reading const tables may has added new sargable predicates. 
@@ -3017,15 +3412,27 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 
   for (s=stat ; s < stat_end ; s++)
   {
+    s->startup_cost= 0;
     if (s->type == JT_SYSTEM || s->type == JT_CONST)
     {
       /* Only one matching row */
-      s->found_records=s->records=s->read_time=1; s->worst_seeks=1.0;
+      s->found_records= s->records= 1;
+      s->read_time=1.0; 
+      s->worst_seeks=1.0;
       continue;
     }
     /* Approximate found rows and time to read them */
-    s->found_records=s->records=s->table->file->stats.records;
-    s->read_time=(ha_rows) s->table->file->scan_time();
+    if (s->table->is_filled_at_execution())
+    {
+      get_delayed_table_estimates(s->table, &s->records, &s->read_time,
+                                  &s->startup_cost);
+      s->found_records= s->records;
+      table->quick_condition_rows=s->records;
+    }
+    else
+    {
+       s->scan_time();
+    }
 
     /*
       Set a max range of how many seeks we can expect when using keys
@@ -3042,9 +3449,17 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       all select distinct fields participate in one index.
     */
     add_group_and_distinct_keys(join, s);
-
-    if (!s->const_keys.is_clear_all() &&
-        !s->table->pos_in_table_list->embedding)
+    
+    /*
+      Perform range analysis if there are keys it could use (1). 
+      Don't do range analysis if we're on the inner side of an outer join (2).
+      Do range analysis if we're on the inner side of a semi-join (3).
+    */
+    if (!s->const_keys.is_clear_all() &&                          // (1)
+        (!s->table->pos_in_table_list->embedding ||               // (2)
+         (s->table->pos_in_table_list->embedding &&               // (3)
+          s->table->pos_in_table_list->embedding->sj_on_expr)) && // (3)
+        !s->table->is_filled_at_execution())
     {
       ha_rows records;
       SQL_SELECT *select;
@@ -3082,31 +3497,62 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       if (records != HA_POS_ERROR)
       {
 	s->found_records=records;
-	s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0);
+	s->read_time= s->quick ? s->quick->read_time : 0.0;
       }
       delete select;
     }
   }
 
+  if (pull_out_semijoin_tables(join))
+    DBUG_RETURN(TRUE);
+
   join->join_tab=stat;
   join->map2table=stat_ref;
-  join->all_tables= table_vector;
+  join->table= table_vector;
   join->const_tables=const_count;
   join->found_const_table_map=found_const_table_map;
 
-  /* Find an optimal join order of the non-constant tables. */
-  if (join->const_tables != join->tables)
-  {
+  if (join->const_tables != join->table_count)
     optimize_keyuse(join, keyuse_array);
-    if (choose_plan(join, all_table_map & ~join->const_table_map))
-      goto error;
-  }
-  else
+   
+  if (optimize_semijoin_nests(join, all_table_map))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+
   {
-    memcpy((uchar*) join->best_positions,(uchar*) join->positions,
-	   sizeof(POSITION)*join->const_tables);
-    join->best_read=1.0;
+    ha_rows records= 1;
+    SELECT_LEX_UNIT *unit= join->select_lex->master_unit();
+
+    /* Find an optimal join order of the non-constant tables. */
+    if (join->const_tables != join->table_count)
+    {
+      if (choose_plan(join, all_table_map & ~join->const_table_map))
+        goto error;
+    }
+    else
+    {
+      memcpy((uchar*) join->best_positions,(uchar*) join->positions,
+	     sizeof(POSITION)*join->const_tables);
+      join->record_count= 1.0;
+      join->best_read=1.0;
+    }
+  
+    if (!(join->select_options & SELECT_DESCRIBE) &&
+        unit->derived && unit->derived->is_materialized_derived())
+    {
+      /*
+        Calculate estimated number of rows for materialized derived
+        table/view.
+      */
+      for (i= 0; i < join->table_count ; i++)
+        records*= join->best_positions[i].records_read ?
+                  (ha_rows)join->best_positions[i].records_read : 1;
+      join->select_lex->increase_derived_records(records);
+    }
   }
+
+  if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
+    goto error;
+
   /* Generate an execution plan from the found optimal join order. */
   DBUG_RETURN(join->thd->killed || get_best_combination(join));
 
@@ -3117,8 +3563,12 @@ error:
     may not be assigned yet by this function (which is building join_tab).
     Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke. 
   */
-  for (tables= tables_arg; tables; tables= tables->next_leaf)
-    tables->table->reginfo.join_tab= NULL;
+  {    
+    TABLE_LIST *table;
+    List_iterator<TABLE_LIST> ti(tables_list);
+    while ((table= ti++))
+      table->table->reginfo.join_tab= NULL;
+  }
   DBUG_RETURN (1);
 }
 
@@ -3144,25 +3594,53 @@ typedef struct key_field_t {
   */
   bool          null_rejecting; 
   bool         *cond_guard; /* See KEYUSE::cond_guard */
+  uint          sj_pred_no; /* See KEYUSE::sj_pred_no */
 } KEY_FIELD;
 
-/* Values in optimize */
-#define KEY_OPTIMIZE_EXISTS		1
-#define KEY_OPTIMIZE_REF_OR_NULL	2
-
 /**
   Merge new key definitions to old ones, remove those not used in both.
 
   This is called for OR between different levels.
 
-  To be able to do 'ref_or_null' we merge a comparison of a column
-  and 'column IS NULL' to one test.  This is useful for sub select queries
-  that are internally transformed to something like:.
+  That is, the function operates on an array of KEY_FIELD elements which has
+  two parts:
+
+                      $LEFT_PART             $RIGHT_PART
+             +-----------------------+-----------------------+
+            start                new_fields                 end
+         
+  $LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two
+  parts of the OR condition. Our task is to produce an array of KEY_FIELD 
+  elements that would correspond to "$LEFT_PART OR $RIGHT_PART". 
+  
+  The rules for combining elements are as follows:
+
+    (keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)=
+     
+     = AND_ij (keyfieldA_i OR keyfieldB_j)
+  
+  We discard all (keyfieldA_i OR keyfieldB_j) that refer to different
+  fields. For those referring to the same field, the logic is as follows:
+    
+    t.keycol=expr1 OR t.keycol=expr2 -> (since expr1 and expr2 are different 
+                                         we can't produce a single equality,
+                                         so produce nothing)
+
+    t.keycol=expr1 OR t.keycol=expr1 -> t.keycol=expr1
+
+    t.keycol=expr1 OR t.keycol IS NULL -> t.keycol=expr1, and also set
+                                          KEY_OPTIMIZE_REF_OR_NULL flag
+
+  The last one is for ref_or_null access. We have handling for this special
+  because it's needed for evaluating IN subqueries that are internally
+  transformed into 
 
   @code
-  SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL 
+    EXISTS(SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL)
   @endcode
 
+  See add_key_fields() for discussion of what is and_level.
+
   KEY_FIELD::null_rejecting is processed as follows: @n
   result has null_rejecting=true if it is set for both ORed references.
   for example:
@@ -3282,6 +3760,52 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
 }
 
 
+/*
+  Given a field, return its index in semi-join's select list, or UINT_MAX
+
+  DESCRIPTION
+    Given a field, we find its table; then see if the table is within a
+    semi-join nest and if the field was in select list of the subselect.
+    If it was, we return field's index in the select list. The value is used
+    by LooseScan strategy.
+*/
+
+static uint get_semi_join_select_list_index(Field *field)
+{
+  uint res= UINT_MAX;
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= field->table->pos_in_table_list->embedding) &&
+      emb_sj_nest->sj_on_expr)
+  {
+    Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred;
+    st_select_lex *subq_lex= subq_pred->unit->first_select();
+    if (subq_pred->left_expr->cols() == 1)
+    {
+      Item *sel_item= subq_lex->ref_pointer_array[0];
+      if (sel_item->type() == Item::FIELD_ITEM &&
+          ((Item_field*)sel_item)->field->eq(field))
+      {
+        res= 0;
+      }
+    }
+    else
+    {
+      for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+      {
+        Item *sel_item= subq_lex->ref_pointer_array[i];
+        if (sel_item->type() == Item::FIELD_ITEM &&
+            ((Item_field*)sel_item)->field->eq(field))
+        {
+          res= i;
+          break;
+        }
+      }
+    }
+  }
+  return res;
+}
+
+
 /**
   Add a possible key to array of possible keys if it's usable as a key
 
@@ -3291,6 +3815,7 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
     @param field           Field used in comparision
     @param eq_func         True if we used =, <=> or IS NULL
     @param value           Value used for comparison with field
+    @param num_values      Number of values[] that we are comparing against
     @param usable_tables   Tables which can be used for key optimization
     @param sargables       IN/OUT Array of found sargable candidates
 
@@ -3303,21 +3828,30 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
 */
 
 static void
-add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
+add_key_field(JOIN *join,
+              KEY_FIELD **key_fields,uint and_level, Item_func *cond,
               Field *field, bool eq_func, Item **value, uint num_values,
               table_map usable_tables, SARGABLE_PARAM **sargables)
 {
-  uint exists_optimize= 0;
-  if (!(field->flags & PART_KEY_FLAG))
+  uint optimize= 0;  
+  if (eq_func &&
+      ((join->is_allowed_hash_join_access() &&
+        field->hash_join_is_possible()) ||
+       (field->table->pos_in_table_list->is_materialized_derived() &&
+        !field->table->created)))
+  {
+    optimize= KEY_OPTIMIZE_EQ;
+  }   
+  else if (!(field->flags & PART_KEY_FLAG))
   {
     // Don't remove column IS NULL on a LEFT JOIN table
     if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
         !field->table->maybe_null || field->null_ptr)
       return;					// Not a key. Skip it
-    exists_optimize= KEY_OPTIMIZE_EXISTS;
+    optimize= KEY_OPTIMIZE_EXISTS;
     DBUG_ASSERT(num_values == 1);
   }
-  else
+  if (optimize != KEY_OPTIMIZE_EXISTS)
   {
     table_map used_tables=0;
     bool optimizable=0;
@@ -3334,12 +3868,12 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
       if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
           !field->table->maybe_null || field->null_ptr)
 	return;					// Can't use left join optimize
-      exists_optimize= KEY_OPTIMIZE_EXISTS;
+      optimize= KEY_OPTIMIZE_EXISTS;
     }
     else
     {
       JOIN_TAB *stat=field->table->reginfo.join_tab;
-      key_map possible_keys=field->key_start;
+      key_map possible_keys=field->get_possible_keys();
       possible_keys.intersect(field->table->keys_in_use_for_query);
       stat[0].keys.merge(possible_keys);             // Add possible keys
 
@@ -3354,7 +3888,8 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
          Field BETWEEN ...
          Field IN ...
       */
-      stat[0].key_dependent|=used_tables;
+      if (field->flags & PART_KEY_FLAG)
+        stat[0].key_dependent|=used_tables;
 
       bool is_const=1;
       for (uint i=0; i<num_values; i++)
@@ -3377,30 +3912,21 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
         (*sargables)->arg_value= value;
         (*sargables)->num_values= num_values;
       }
+      if (!eq_func) // eq_func is NEVER true when num_values > 1
+        return;
+
       /*
-	We can't always use indexes when comparing a string index to a
-	number. cmp_type() is checked to allow compare of dates to numbers.
-        eq_func is NEVER true when num_values > 1
+	We can't use indexes when comparing a string index to a
+	number or two strings if the effective collation
+        of the operation differ from the field collation.
        */
-      if (!eq_func)
-        return;
-      if (field->result_type() == STRING_RESULT)
+
+      if (field->cmp_type() == STRING_RESULT)
       {
-        if ((*value)->result_type() != STRING_RESULT)
-        {
-          if (field->cmp_type() != (*value)->result_type())
-            return;
-        }
-        else
-        {
-          /*
-            We can't use indexes if the effective collation
-            of the operation differ from the field collation.
-          */
-          if (field->cmp_type() == STRING_RESULT &&
-              ((Field_str*)field)->charset() != cond->compare_collation())
+        if ((*value)->cmp_type() != STRING_RESULT)
             return;
-        }
+        if (((Field_str*)field)->charset() != cond->compare_collation())
+          return;
       }
     }
   }
@@ -3413,8 +3939,8 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
   (*key_fields)->field=		field;
   (*key_fields)->eq_func=	eq_func;
   (*key_fields)->val=		*value;
-  (*key_fields)->level=		and_level;
-  (*key_fields)->optimize=	exists_optimize;
+  (*key_fields)->level=         and_level;
+  (*key_fields)->optimize=      optimize;
   /*
     If the condition has form "tbl.keypart = othertbl.field" and 
     othertbl.field can be NULL, there will be no matches if othertbl.field 
@@ -3422,11 +3948,19 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
     We use null_rejecting in add_not_null_conds() to add
     'othertbl.field IS NOT NULL' to tab->select_cond.
   */
-  (*key_fields)->null_rejecting= ((cond->functype() == Item_func::EQ_FUNC ||
-                                   cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
-                                  ((*value)->type() == Item::FIELD_ITEM) &&
-                                  ((Item_field*)*value)->field->maybe_null());
+  {
+    Item *real= (*value)->real_item();
+    if (((cond->functype() == Item_func::EQ_FUNC) ||
+         (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
+        (real->type() == Item::FIELD_ITEM) &&
+        ((Item_field*)real)->field->maybe_null())
+      (*key_fields)->null_rejecting= true;
+    else
+      (*key_fields)->null_rejecting= false;
+  }
   (*key_fields)->cond_guard= NULL;
+
+  (*key_fields)->sj_pred_no= get_semi_join_select_list_index(field);
   (*key_fields)++;
 }
 
@@ -3453,29 +3987,29 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
 */
 
 static void
-add_key_equal_fields(KEY_FIELD **key_fields, uint and_level,
-                     Item_func *cond, Item_field *field_item,
+add_key_equal_fields(JOIN *join, KEY_FIELD **key_fields, uint and_level,
+                     Item_func *cond, Item *field_item,
                      bool eq_func, Item **val,
                      uint num_values, table_map usable_tables,
                      SARGABLE_PARAM **sargables)
 {
-  Field *field= field_item->field;
-  add_key_field(key_fields, and_level, cond, field,
+  Field *field= ((Item_field *) (field_item->real_item()))->field;
+  add_key_field(join, key_fields, and_level, cond, field,
                 eq_func, val, num_values, usable_tables, sargables);
-  Item_equal *item_equal= field_item->item_equal;
+  Item_equal *item_equal= field_item->get_item_equal();
   if (item_equal)
   { 
     /*
       Add to the set of possible key values every substitution of
       the field for an equal field included into item_equal
     */
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
-    while ((item= it++))
+    Item_equal_fields_iterator it(*item_equal);
+    while (it++)
     {
-      if (!field->eq(item->field))
+      Field *equal_field= it.get_curr_field();
+      if (!field->eq(equal_field))
       {
-        add_key_field(key_fields, and_level, cond, item->field,
+        add_key_field(join, key_fields, and_level, cond, equal_field,
                       eq_func, val, num_values, usable_tables,
                       sargables);
       }
@@ -3500,11 +4034,29 @@ static bool
 is_local_field (Item *field)
 {
   return field->real_item()->type() == Item::FIELD_ITEM
-    && !(field->used_tables() & OUTER_REF_TABLE_BIT)
-    && !((Item_field *)field->real_item())->depended_from;
+     && !(field->used_tables() & OUTER_REF_TABLE_BIT)
+    && !((Item_field *)field->real_item())->get_depended_from();
 }
 
 
+/*
+  In this and other functions, and_level is a number that is ever-growing
+  and is different for the contents of every AND or OR clause. For example,
+  when processing clause
+
+     (a AND b AND c) OR (x AND y)
+  
+  we'll have
+   * KEY_FIELD elements for (a AND b AND c) are assigned and_level=1
+   * KEY_FIELD elements for (x AND y) are assigned and_level=2
+   * OR operation is performed, and whatever elements are left after it are
+     assigned and_level=3.
+
+  The primary reason for having and_level attribute is the OR operation which 
+  uses and_level to mark KEY_FIELDs that should get into the result of the OR
+  operation
+*/
+
 static void
 add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
                COND *cond, table_map usable_tables,
@@ -3611,10 +4163,10 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         condition is of the form::
         '<field> BETWEEN value[1] AND value[2]'
       */
-      if (is_local_field (values[0]))
+      if (is_local_field(values[0]))
       {
         field_item= (Item_field *) (values[0]->real_item());
-        add_key_equal_fields(key_fields, *and_level, cond_func,
+        add_key_equal_fields(join, key_fields, *and_level, cond_func,
                              field_item, equal_func, &values[1],
                              num_values, usable_tables, sargables);
       }
@@ -3625,10 +4177,10 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
       */
       for (uint i= 1; i <= num_values; i++)
       {
-        if (is_local_field (values[i]))
+        if (is_local_field(values[i]))
         {
           field_item= (Item_field *) (values[i]->real_item());
-          add_key_equal_fields(key_fields, *and_level, cond_func,
+          add_key_equal_fields(join, key_fields, *and_level, cond_func,
                                field_item, equal_func, values,
                                1, usable_tables, sargables);
         }
@@ -3645,7 +4197,7 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         values--;
       DBUG_ASSERT(cond_func->functype() != Item_func::IN_FUNC ||
                   cond_func->argument_count() != 2);
-      add_key_equal_fields(key_fields, *and_level, cond_func,
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
                            (Item_field*) (cond_func->key_item()->real_item()),
                            0, values, 
                            cond_func->argument_count()-1,
@@ -3660,8 +4212,9 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
 
     if (is_local_field (cond_func->arguments()[0]))
     {
-      add_key_equal_fields(key_fields, *and_level, cond_func,
-	                (Item_field*) (cond_func->arguments()[0])->real_item(),
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
+                           (Item_field*) (cond_func->arguments()[0])->
+                           real_item(),
 		           equal_func,
                            cond_func->arguments()+1, 1, usable_tables,
                            sargables);
@@ -3669,8 +4222,9 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
     if (is_local_field (cond_func->arguments()[1]) &&
 	cond_func->functype() != Item_func::LIKE_FUNC)
     {
-      add_key_equal_fields(key_fields, *and_level, cond_func, 
-                       (Item_field*) (cond_func->arguments()[1])->real_item(),
+      add_key_equal_fields(join, key_fields, *and_level, cond_func, 
+                           (Item_field*) (cond_func->arguments()[1])->
+                           real_item(),
 		           equal_func,
                            cond_func->arguments(),1,usable_tables,
                            sargables);
@@ -3685,17 +4239,17 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
       Item *tmp=new Item_null;
       if (unlikely(!tmp))                       // Should never be true
 	return;
-      add_key_equal_fields(key_fields, *and_level, cond_func,
-		    (Item_field*) (cond_func->arguments()[0])->real_item(),
-		    cond_func->functype() == Item_func::ISNULL_FUNC,
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
+                           (Item_field*) (cond_func->arguments()[0])->
+                           real_item(),
+                           cond_func->functype() == Item_func::ISNULL_FUNC,
 			   &tmp, 1, usable_tables, sargables);
     }
     break;
   case Item_func::OPTIMIZE_EQUAL:
     Item_equal *item_equal= (Item_equal *) cond;
     Item *const_item= item_equal->get_const();
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
+    Item_equal_fields_iterator it(*item_equal);
     if (const_item)
     {
       /*
@@ -3703,9 +4257,10 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         field1=const_item as a condition allowing an index access of the table
         with field1 by the keys value of field1.
       */   
-      while ((item= it++))
+      while (it++)
       {
-        add_key_field(key_fields, *and_level, cond_func, item->field,
+        Field *equal_field= it.get_curr_field();
+        add_key_field(join, key_fields, *and_level, cond_func, equal_field,
                       TRUE, &const_item, 1, usable_tables, sargables);
       }
     }
@@ -3717,16 +4272,18 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         field1=field2 as a condition allowing an index access of the table
         with field1 by the keys value of field2.
       */   
-      Item_equal_iterator fi(*item_equal);
-      while ((item= fi++))
+      Item_equal_fields_iterator fi(*item_equal);
+      while (fi++)
       {
-        Field *field= item->field;
+        Field *field= fi.get_curr_field();
+        Item *item;
         while ((item= it++))
         {
-          if (!field->eq(item->field))
+          Field *equal_field= it.get_curr_field();
+          if (!field->eq(equal_field))
           {
-            add_key_field(key_fields, *and_level, cond_func, field,
-                          TRUE, (Item **) &item, 1, usable_tables,
+            add_key_field(join, key_fields, *and_level, cond_func, field,
+                          TRUE, &item, 1, usable_tables,
                           sargables);
           }
         }
@@ -3746,6 +4303,55 @@ max_part_bit(key_part_map bits)
   return found;
 }
 
+
+/**
+  Add a new keuse to the specified array of KEYUSE objects
+
+  @param[in,out]  keyuse_array  array of keyuses to be extended 
+  @param[in]      key_field     info on the key use occurrence
+  @param[in]      key           key number for the keyse to be added
+  @param[in]      part          key part for the keyuse to be added
+
+  @note
+  The function builds a new KEYUSE object for a key use utilizing the info
+  on the left and right parts of the given key use  extracted from the 
+  structure key_field, the key number and key part for this key use. 
+  The built object is added to the dynamic array keyuse_array.
+
+  @retval         0             the built object is succesfully added 
+  @retval         1             otherwise
+*/
+
+static bool
+add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
+          uint key, uint part)
+{
+  KEYUSE keyuse;
+  Field *field= key_field->field;
+
+  keyuse.table= field->table;
+  keyuse.val= key_field->val;
+  keyuse.key= key;
+  if (!is_hash_join_key_no(key))
+  {
+    keyuse.keypart=part;
+    keyuse.keypart_map= (key_part_map) 1 << part;
+  }
+  else
+  {
+    keyuse.keypart= field->field_index;
+    keyuse.keypart_map= (key_part_map) 0;
+  }
+  keyuse.used_tables= key_field->val->used_tables();
+  keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
+  keyuse.ref_table_rows= 0;
+  keyuse.null_rejecting= key_field->null_rejecting;
+  keyuse.cond_guard= key_field->cond_guard;
+  keyuse.sj_pred_no= key_field->sj_pred_no;
+  return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
+}
+
+
 /*
   Add all keys with uses 'field' for some keypart
   If field->and_level != and_level then only mark key_part as const_part
@@ -3756,11 +4362,10 @@ max_part_bit(key_part_map bits)
 */
 
 static bool
-add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field)
+add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
 {
   Field *field=key_field->field;
   TABLE *form= field->table;
-  KEYUSE keyuse;
 
   if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
   {
@@ -3776,20 +4381,24 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field)
       {
 	if (field->eq(form->key_info[key].key_part[part].field))
 	{
-	  keyuse.table= field->table;
-	  keyuse.val =  key_field->val;
-	  keyuse.key =  key;
-	  keyuse.keypart=part;
-	  keyuse.keypart_map= (key_part_map) 1 << part;
-	  keyuse.used_tables=key_field->val->used_tables();
-	  keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
-          keyuse.null_rejecting= key_field->null_rejecting;
-          keyuse.cond_guard= key_field->cond_guard;
-	  if (insert_dynamic(keyuse_array,(uchar*) &keyuse))
+          if (add_keyuse(keyuse_array, key_field, key, part))
             return TRUE;
 	}
       }
     }
+    if (field->hash_join_is_possible() &&
+        (key_field->optimize & KEY_OPTIMIZE_EQ) &&
+        key_field->val->used_tables())
+    {
+      /* 
+        If a key use is extracted from an equi-join predicate then it is
+        added not only as a key use for every index whose component can
+        be evalusted utilizing this key use, but also as a key use for
+        hash join. Such key uses are marked with a special key number. 
+      */    
+      if (add_keyuse(keyuse_array, key_field, get_hash_join_key_no(), 0))
+        return TRUE;
+    }
   }
   return FALSE;
 }
@@ -3822,7 +4431,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
           ((functype == Item_func::GE_FUNC && arg1->val_real() > 0) ||
            (functype == Item_func::GT_FUNC && arg1->val_real() >=0)))
         cond_func= (Item_func_match *) arg0;
-      else if (arg0->const_item() &&
+      else if (arg0->const_item() && arg0->cols() == 1 &&
                 arg1->type() == Item::FUNC_ITEM &&
                 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
                ((functype == Item_func::LE_FUNC && arg0->val_real() > 0) ||
@@ -3857,6 +4466,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
   keyuse.used_tables=cond_func->key_item()->used_tables();
   keyuse.optimize= 0;
   keyuse.keypart_map= 0;
+  keyuse.sj_pred_no= UINT_MAX;
   return insert_dynamic(keyuse_array,(uchar*) &keyuse);
 }
 
@@ -3869,6 +4479,9 @@ sort_keyuse(KEYUSE *a,KEYUSE *b)
     return (int) (a->table->tablenr - b->table->tablenr);
   if (a->key != b->key)
     return (int) (a->key - b->key);
+  if (a->key == MAX_KEY && b->key == MAX_KEY && 
+      a->used_tables != b->used_tables)
+    return (int) ((ulong) a->used_tables - (ulong) b->used_tables);
   if (a->keypart != b->keypart)
     return (int) (a->keypart - b->keypart);
   // Place const values before other ones
@@ -3921,20 +4534,34 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
                                   SARGABLE_PARAM **sargables)
 {
   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
+  List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
+  bool have_another = FALSE;
   table_map tables= 0;
   TABLE_LIST *table;
   DBUG_ASSERT(nested_join_table->nested_join);
 
-  while ((table= li++))
+  while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
+                                            (table= li++))))
   {
     if (table->nested_join)
-      add_key_fields_for_nj(join, table, end, and_level, sargables);
+    {
+      if (!table->on_expr)
+      {
+        /* It's a semi-join nest. Walk into it as if it wasn't a nest */
+        have_another= TRUE;
+        li2= li;
+        li= List_iterator<TABLE_LIST>(table->nested_join->join_list); 
+      }
+      else
+        add_key_fields_for_nj(join, table, end, and_level, sargables);
+    }
     else
       if (!table->on_expr)
         tables |= table->table->map;
   }
-  add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
-                 sargables);
+  if (nested_join_table->on_expr)
+    add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
+                   sargables);
 }
 
 
@@ -3961,11 +4588,10 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
 
 static bool
 update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
-                    uint tables, COND *cond, COND_EQUAL *cond_equal,
-                    table_map normal_tables, SELECT_LEX *select_lex,
-                    SARGABLE_PARAM **sargables)
+                    uint tables, COND *cond, table_map normal_tables,
+                    SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
 {
-  uint	and_level,i,found_eq_constant;
+  uint	and_level,i;
   KEY_FIELD *key_fields, *end, *field;
   uint sz;
   uint m= max(select_lex->max_equal_elems,1);
@@ -4005,19 +4631,21 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
 
   if (my_init_dynamic_array(keyuse,sizeof(KEYUSE),20,64))
     return TRUE;
+
   if (cond)
   {
+    KEY_FIELD *saved_field= field;
     add_key_fields(join_tab->join, &end, &and_level, cond, normal_tables,
                    sargables);
     for (; field != end ; field++)
     {
-      if (add_key_part(keyuse,field))
-        return TRUE;
+
       /* Mark that we can optimize LEFT JOIN */
       if (field->val->type() == Item::NULL_ITEM &&
 	  !field->field->real_maybe_null())
 	field->field->table->reginfo.not_exists_optimize=1;
     }
+    field= saved_field;
   }
   for (i=0 ; i < tables ; i++)
   {
@@ -4061,70 +4689,85 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
       return TRUE;
   }
 
-  /*
-    Sort the array of possible keys and remove the following key parts:
-    - ref if there is a keypart which is a ref and a const.
-      (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
-      then we skip the key part corresponding to b=t2.d)
-    - keyparts without previous keyparts
-      (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
-      used in the query, we drop the partial key parts from consideration).
-    Special treatment for ft-keys.
-  */
-  if (keyuse->elements)
-  {
-    KEYUSE key_end,*prev,*save_pos,*use;
+  return FALSE;
+}
 
-    my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE),
-	  (qsort_cmp) sort_keyuse);
 
-    bzero((char*) &key_end,sizeof(key_end));    /* Add for easy testing */
-    if (insert_dynamic(keyuse,(uchar*) &key_end))
-      return TRUE;
+/**
+  Sort the array of possible keys and remove the following key parts:
+  - ref if there is a keypart which is a ref and a const.
+    (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
+    then we skip the key part corresponding to b=t2.d)
+  - keyparts without previous keyparts
+    (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
+    used in the query, we drop the partial key parts from consideration).
+  Special treatment for ft-keys.
+*/
+
+static bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse, 
+                                   bool skip_unprefixed_keyparts)
+{
+  KEYUSE key_end, *prev, *save_pos, *use;
+  uint found_eq_constant, i;
+
+  DBUG_ASSERT(keyuse->elements);
+
+  my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
+           (qsort_cmp) sort_keyuse);
 
-    use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
-    prev= &key_end;
-    found_eq_constant=0;
-    for (i=0 ; i < keyuse->elements-1 ; i++,use++)
+  bzero((char*) &key_end, sizeof(key_end));    /* Add for easy testing */
+  if (insert_dynamic(keyuse, (uchar*) &key_end))
+    return TRUE;
+
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
+    generate_derived_keys(keyuse);
+
+  use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
+  prev= &key_end;
+  found_eq_constant= 0;
+  for (i=0 ; i < keyuse->elements-1 ; i++,use++)
+  {
+    if (!use->is_for_hash_join())
     {
       if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
-	use->table->const_key_parts[use->key]|= use->keypart_map;
+        use->table->const_key_parts[use->key]|= use->keypart_map;
       if (use->keypart != FT_KEYPART)
       {
-	if (use->key == prev->key && use->table == prev->table)
-	{
-	  if (prev->keypart+1 < use->keypart ||
-	      (prev->keypart == use->keypart && found_eq_constant))
-	    continue;				/* remove */
-	}
-	else if (use->keypart != 0)		// First found must be 0
-	  continue;
+        if (use->key == prev->key && use->table == prev->table)
+        {
+          if ((prev->keypart+1 < use->keypart && skip_unprefixed_keyparts) ||
+              (prev->keypart == use->keypart && found_eq_constant))
+            continue;				/* remove */
+        }
+        else if (use->keypart != 0 && skip_unprefixed_keyparts)
+          continue; /* remove - first found must be 0 */
       }
 
-#if defined(__GNUC__) && !MY_GNUC_PREREQ(4,4)
-      /*
-        Old gcc used a memcpy(), which is undefined if save_pos==use:
-        http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
-        http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
-      */
-      if (save_pos != use)
-#endif
-        *save_pos= *use;
-      prev=use;
+      prev= use;
       found_eq_constant= !use->used_tables;
-      /* Save ptr to first use */
-      if (!use->table->reginfo.join_tab->keyuse)
-	use->table->reginfo.join_tab->keyuse=save_pos;
       use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
-      save_pos++;
     }
-    i=(uint) (save_pos-(KEYUSE*) keyuse->buffer);
-    (void) set_dynamic(keyuse,(uchar*) &key_end,i);
-    keyuse->elements=i;
+    /*
+      Old gcc used a memcpy(), which is undefined if save_pos==use:
+      http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
+      http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
+      This also disables a valgrind warning, so better to have the test.
+    */
+    if (save_pos != use)
+      *save_pos= *use;
+    /* Save ptr to first use */
+    if (!use->table->reginfo.join_tab->keyuse)
+      use->table->reginfo.join_tab->keyuse= save_pos;
+    save_pos++;
   }
+  i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
+  (void) set_dynamic(keyuse,(uchar*) &key_end,i);
+  keyuse->elements= i;
+
   return FALSE;
 }
 
+
 /**
   Update some values in keyuse for faster choose_plan() loop.
 */
@@ -4149,12 +4792,15 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
 	(map= (keyuse->used_tables & ~join->const_table_map &
 	       ~OUTER_REF_TABLE_BIT)))
     {
-      uint tablenr;
-      for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ;
-      if (map == 1)			// Only one table
+      uint n_tables= my_count_bits(map);
+      if (n_tables == 1)			// Only one table
       {
-	TABLE *tmp_table=join->all_tables[tablenr];
-	keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100);
+        Table_map_iterator it(map);
+        int tablenr= it.next_bit();
+        DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END);
+	TABLE *tmp_table=join->table[tablenr];
+        if (tmp_table) // already created
+          keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100);
       }
     }
     /*
@@ -4190,7 +4836,7 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
   Item_sum **sum_item_ptr;
   bool result= false;
 
-  if (join->tables != 1 ||                    /* reference more than 1 table */
+  if (join->table_count != 1 ||                    /* reference more than 1 table */
       join->select_distinct ||                /* or a DISTINCT */
       join->select_lex->olap == ROLLUP_TYPE)  /* Check (B3) for ROLLUP */
     return false;
@@ -4315,14 +4961,17 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
 
 /** Save const tables first as used tables. */
 
-static void
-set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
+void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
 {
   join->positions[idx].table= table;
   join->positions[idx].key=key;
   join->positions[idx].records_read=1.0;	/* This is a const table */
   join->positions[idx].ref_depend_map= 0;
 
+  join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
+  join->positions[idx].sj_strategy= SJ_OPT_NONE;
+  join->positions[idx].use_join_buffer= FALSE;
+
   /* Move the const table as down as possible in best_ref */
   JOIN_TAB **pos=join->best_ref+idx+1;
   JOIN_TAB *next=join->best_ref[idx];
@@ -4336,6 +4985,35 @@ set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
 }
 
 
+/* Estimate of the number matching candidates in the joined table */
+
+inline
+ha_rows matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint)
+{
+  ha_rows records= s->found_records;
+  /*
+    If there is a filtering condition on the table (i.e. ref analyzer found
+    at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
+    preceding this table in the join order we're now considering), then 
+    assume that 25% of the rows will be filtered out by this condition.
+
+    This heuristic is supposed to force tables used in exprZ to be before
+    this table in join order.
+  */
+  if (with_found_constraint)
+    records-= records/4;
+
+    /*
+      If applicable, get a more accurate estimate. Don't use the two
+      heuristics at once.
+    */
+  if (s->table->quick_condition_rows != s->found_records)
+    records= s->table->quick_condition_rows;
+
+  return records;
+}
+
+
 /**
   Find the best access path for an extension of a partial execution
   plan and add this path to the plan.
@@ -4353,23 +5031,28 @@ set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
   @param thd              thread for the connection that submitted the query
   @param remaining_tables set of tables not included into the partial plan yet
   @param idx              the length of the partial plan
+  @param disable_jbuf     TRUE<=> Don't use join buffering
   @param record_count     estimate for the number of records returned by the
                           partial plan
-  @param read_time        the cost of the partial plan
+  @param pos              OUT Table access plan
+  @param loose_scan_pos   OUT Table plan that uses loosescan, or set cost to 
+                              DBL_MAX if not possible.
 
   @return
     None
 */
 
-static void
+void
 best_access_path(JOIN      *join,
                  JOIN_TAB  *s,
-                 THD       *thd,
                  table_map remaining_tables,
                  uint      idx,
+                 bool      disable_jbuf,
                  double    record_count,
-                 double    read_time)
+                 POSITION *pos,
+                 POSITION *loose_scan_pos)
 {
+  THD *thd= join->thd;
   KEYUSE *best_key=         0;
   uint best_max_key_part=   0;
   my_bool found_constraint= 0;
@@ -4379,12 +5062,22 @@ best_access_path(JOIN      *join,
   table_map best_ref_depends_map= 0;
   double tmp;
   ha_rows rec;
+  bool best_uses_jbuf= FALSE;
+  MY_BITMAP *eq_join_set= &s->table->eq_join_set;
+  KEYUSE *hj_start_key= 0;
+
+  Loose_scan_opt loose_scan_opt;
   DBUG_ENTER("best_access_path");
+  
+  bitmap_clear_all(eq_join_set);
 
+  loose_scan_opt.init(join, s, remaining_tables);
+  
   if (s->keyuse)
   {                                            /* Use key if possible */
+    KEYUSE *keyuse;
+    KEYUSE *start_key=0;
     TABLE *table= s->table;
-    KEYUSE *keyuse,*start_key=0;
     double best_records= DBL_MAX;
     uint max_key_part=0;
 
@@ -4392,19 +5085,41 @@ best_access_path(JOIN      *join,
     rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE;  // Assumed records/key
     for (keyuse=s->keyuse ; keyuse->table == table ;)
     {
+      KEY *keyinfo;
       key_part_map found_part= 0;
       table_map found_ref= 0;
       uint key= keyuse->key;
-      KEY *keyinfo= table->key_info+key;
       bool ft_key=  (keyuse->keypart == FT_KEYPART);
       /* Bitmap of keyparts where the ref access is over 'keypart=const': */
       key_part_map const_part= 0;
       /* The or-null keypart in ref-or-null access: */
       key_part_map ref_or_null_part= 0;
+      if (is_hash_join_key_no(key))
+      {
+        /* 
+          Hash join as any join employing join buffer can be used to join
+          only those tables that are joined after the first non const table
+	*/  
+        if (!(remaining_tables & keyuse->used_tables) &&
+            idx > join->const_tables)
+        {
+          if (!hj_start_key)
+            hj_start_key= keyuse;
+          bitmap_set_bit(eq_join_set, keyuse->keypart);
+        }
+        keyuse++;
+        continue;
+      }
+
+      keyinfo= table->key_info+key;
 
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
 
+      loose_scan_opt.next_ref_key();
+      DBUG_PRINT("info", ("Considering ref access on key %s",
+                          keyuse->table->key_info[keyuse->key].name));
+
       do /* For each keypart */
       {
         uint keypart= keyuse->keypart;
@@ -4413,7 +5128,6 @@ best_access_path(JOIN      *join,
         
         do /* For each way to access the keypart */
         {
-
           /*
             if 1. expression doesn't refer to forward tables
                2. we won't get two ref-or-null's
@@ -4426,8 +5140,8 @@ best_access_path(JOIN      *join,
             if (!(keyuse->used_tables & ~join->const_table_map))
               const_part|= keyuse->keypart_map;
 
-            double tmp2= prev_record_reads(join, idx, (found_ref |
-                                                      keyuse->used_tables));
+            double tmp2= prev_record_reads(join->positions, idx,
+                                           (found_ref | keyuse->used_tables));
             if (tmp2 < best_prev_record_reads)
             {
               best_part_found_ref= keyuse->used_tables & ~join->const_table_map;
@@ -4442,6 +5156,7 @@ best_access_path(JOIN      *join,
             if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
               ref_or_null_part |= keyuse->keypart_map;
           }
+          loose_scan_opt.add_keyuse(remaining_tables, keyuse);
           keyuse++;
         } while (keyuse->table == table && keyuse->key == key &&
                  keyuse->keypart == keypart);
@@ -4451,7 +5166,7 @@ best_access_path(JOIN      *join,
       /*
         Assume that that each key matches a proportional part of table.
       */
-      if (!found_part && !ft_key)
+      if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
         continue;                               // Nothing usable found
 
       if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
@@ -4466,22 +5181,22 @@ best_access_path(JOIN      *join,
           Really, there should be records=0.0 (yes!)
           but 1.0 would be probably safer
         */
-        tmp= prev_record_reads(join, idx, found_ref);
+        tmp= prev_record_reads(join->positions, idx, found_ref);
         records= 1.0;
       }
       else
       {
-        found_constraint= 1;
-        /*
-          Check if we found full key
-        */
+        found_constraint= test(found_part);
+        loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
+
+        /* Check if we found full key */
         if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
             !ref_or_null_part)
         {                                         /* use eq key */
           max_key_part= (uint) ~0;
           if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
           {
-            tmp = prev_record_reads(join, idx, found_ref);
+            tmp = prev_record_reads(join->positions, idx, found_ref);
             records=1.0;
           }
           else
@@ -4548,14 +5263,11 @@ best_access_path(JOIN      *join,
             tmp= records;
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-            {
-              /* we can use only index tree */
-              uint keys_per_block= table->file->stats.block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp= record_count*(tmp+keys_per_block-1)/keys_per_block;
-            }
+              tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
-              tmp= record_count*min(tmp,s->worst_seeks);
+              tmp= table->file->read_time(key, 1,
+                                          (ha_rows) min(tmp,s->worst_seeks)-1);
+            tmp*= record_count;
           }
         }
         else
@@ -4715,20 +5427,20 @@ best_access_path(JOIN      *join,
             /* Limit the number of matched rows */
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-            {
-              /* we can use only index tree */
-              uint keys_per_block= table->file->stats.block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp= record_count*(tmp+keys_per_block-1)/keys_per_block;
-            }
+              tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
-              tmp= record_count*min(tmp,s->worst_seeks);
+              tmp= table->file->read_time(key, 1,
+                                          (ha_rows) min(tmp,s->worst_seeks)-1);
+            tmp*= record_count;
           }
           else
             tmp= best_time;                    // Do nothing
         }
+
+        tmp += s->startup_cost;
+        loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
       } /* not ft_key */
-      if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
+      if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE)
       {
         best_time= tmp + records/(double) TIME_FOR_COMPARE;
         best= tmp;
@@ -4737,10 +5449,45 @@ best_access_path(JOIN      *join,
         best_max_key_part= max_key_part;
         best_ref_depends_map= found_ref;
       }
-    }
+    } /* for each key */
     records= best_records;
   }
 
+  /* 
+    If there is no key to access the table, but there is an equi-join
+    predicate connecting the table with the privious tables then we
+    consider the possibility of using hash join.
+    We need also to check that:
+    (1) s is inner table of semi-join -> join cache is allowed for semijoins
+    (2) s is inner table of outer join -> join cache is allowed for outer joins
+  */  
+  if (idx > join->const_tables && best_key == 0 && 
+     !bitmap_is_clear_all(eq_join_set) &&  !disable_jbuf &&
+      (!s->emb_sj_nest ||                     
+       join->allowed_semijoin_with_cache) &&    // (1)
+      (!(s->table->map & join->outer_join) ||
+       join->allowed_outer_join_with_cache))    // (2)
+  {
+    double join_sel= 0.1;
+    /* Estimate the cost of  the hash join access to the table */
+    ha_rows rnd_records= matching_candidates_in_table(s, found_constraint);
+
+    tmp= s->quick ? s->quick->read_time : s->scan_time();
+    tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+
+    /* We read the table as many times as join buffer becomes full. */
+    tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
+                          record_count /
+                          (double) thd->variables.join_buff_size));
+    best_time= tmp + 
+               (record_count*join_sel) / TIME_FOR_COMPARE * rnd_records;
+    best= tmp;
+    records= rows2double(rnd_records);
+    best_key= hj_start_key;
+    best_ref_depends_map= 0;
+    best_uses_jbuf= TRUE;
+   }
+
   /*
     Don't test table scan if it can't be better.
     Prefer key lookup if we would use the same key for scanning.
@@ -4750,7 +5497,7 @@ best_access_path(JOIN      *join,
     This is because table scans uses index and we would not win
     anything by using a table scan.
 
-    A word for word translation of the below if-statement in psergey's
+    A word for word translation of the below if-statement in sergefp's
     understanding: we check if we should use table scan if:
     (1) The found 'ref' access produces more records than a table scan
         (or index scan, or quick select), or 'ref' is more expensive than
@@ -4768,39 +5515,28 @@ best_access_path(JOIN      *join,
         Since we have a 'ref' access path, and FORCE INDEX instructs us to
         choose it over ALL/index, there is no need to consider a full table
         scan.
+    (5) Non-flattenable semi-joins: don't consider doing a scan of temporary
+        table if we had an option to make lookups into it. In real-world cases,
+        lookups are cheaper than full scans, but when the table is small, they
+        can be [considered to be] more expensive, which causes lookups not to 
+        be used for cases with small datasets, which is annoying.
   */
   if ((records >= s->found_records || best > s->read_time) &&            // (1)
       !(s->quick && best_key && s->quick->index == best_key->key &&      // (2)
         best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2)
       !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) &&   // (3)
         ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3)
-      !(s->table->force_index && best_key && !s->quick))                 // (4)
+      !(s->table->force_index && best_key && !s->quick) &&               // (4)
+      !(best_key && s->table->pos_in_table_list->jtbm_subselect))        // (5)
   {                                             // Check full join
-    ha_rows rnd_records= s->found_records;
-    /*
-      If there is a filtering condition on the table (i.e. ref analyzer found
-      at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
-      preceding this table in the join order we're now considering), then 
-      assume that 25% of the rows will be filtered out by this condition.
-
-      This heuristic is supposed to force tables used in exprZ to be before
-      this table in join order.
-    */
-    if (found_constraint)
-      rnd_records-= rnd_records/4;
-
-    /*
-      If applicable, get a more accurate estimate. Don't use the two
-      heuristics at once.
-    */
-    if (s->table->quick_condition_rows != s->found_records)
-      rnd_records= s->table->quick_condition_rows;
+    ha_rows rnd_records= matching_candidates_in_table(s, found_constraint);
 
     /*
       Range optimizer never proposes a RANGE if it isn't better
       than FULL: so if RANGE is present, it's always preferred to FULL.
       Here we estimate its cost.
     */
+
     if (s->quick)
     {
       /*
@@ -4815,12 +5551,14 @@ best_access_path(JOIN      *join,
       tmp= record_count *
         (s->quick->read_time +
          (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
+
+      loose_scan_opt.check_range_access(join, idx, s->quick);
     }
     else
     {
       /* Estimate cost of reading table. */
-      tmp= s->table->file->scan_time();
-      if (s->table->map & join->outer_join)     // Can't use join cache
+      tmp= s->scan_time();
+      if ((s->table->map & join->outer_join) || disable_jbuf)     // Can't use join cache
       {
         /*
           For each record we have to:
@@ -4848,6 +5586,7 @@ best_access_path(JOIN      *join,
       }
     }
 
+    tmp += s->startup_cost;
     /*
       We estimate the cost of evaluating WHERE clause for found records
       as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
@@ -4855,7 +5594,8 @@ best_access_path(JOIN      *join,
     */
     if (best == DBL_MAX ||
         (tmp  + record_count/(double) TIME_FOR_COMPARE*rnd_records <
-         best + record_count/(double) TIME_FOR_COMPARE*records))
+         (best_key->is_for_hash_join() ? best_time :
+          best + record_count/(double) TIME_FOR_COMPARE*records)))
     {
       /*
         If the table has a range (s->quick is set) make_join_select()
@@ -4866,15 +5606,21 @@ best_access_path(JOIN      *join,
       best_key= 0;
       /* range/index_merge/ALL/index access method are "independent", so: */
       best_ref_depends_map= 0;
+      best_uses_jbuf= test(!disable_jbuf && !((s->table->map & 
+                                               join->outer_join)));
     }
   }
 
   /* Update the cost information for the current partial plan */
-  join->positions[idx].records_read= records;
-  join->positions[idx].read_time=    best;
-  join->positions[idx].key=          best_key;
-  join->positions[idx].table=        s;
-  join->positions[idx].ref_depend_map= best_ref_depends_map;
+  pos->records_read= records;
+  pos->read_time=    best;
+  pos->key=          best_key;
+  pos->table=        s;
+  pos->ref_depend_map= best_ref_depends_map;
+  pos->loosescan_key= MAX_KEY;
+  pos->use_join_buffer= best_uses_jbuf;
+   
+  loose_scan_opt.save_to_position(s, loose_scan_pos);
 
   if (!best_key &&
       idx == join->const_tables &&
@@ -4909,7 +5655,7 @@ best_access_path(JOIN      *join,
     TRUE        Fatal error
 */
 
-static bool
+bool
 choose_plan(JOIN *join, table_map join_tables)
 {
   uint search_depth= join->thd->variables.optimizer_search_depth;
@@ -4918,19 +5664,34 @@ choose_plan(JOIN *join, table_map join_tables)
   DBUG_ENTER("choose_plan");
 
   join->cur_embedding_map= 0;
-  reset_nj_counters(join->join_list);
-  /*
-    if (SELECT_STRAIGHT_JOIN option is set)
-      reorder tables so dependent tables come after tables they depend 
-      on, otherwise keep tables in the order they were specified in the query 
-    else
-      Apply heuristic: pre-sort all access plans with respect to the number of
-      records accessed.
-  */
-  my_qsort(join->best_ref + join->const_tables,
-           join->tables - join->const_tables, sizeof(JOIN_TAB*),
-           straight_join ? join_tab_cmp_straight : join_tab_cmp);
-  
+  join->cur_dups_producing_tables= 0;
+  reset_nj_counters(join, join->join_list);
+  qsort2_cmp jtab_sort_func;
+
+  if (join->emb_sjm_nest)
+  {
+    /* We're optimizing semi-join materialization nest, so put the 
+       tables from this semi-join as first
+    */
+    jtab_sort_func= join_tab_cmp_embedded_first;
+  }
+  else
+  {
+    /*
+      if (SELECT_STRAIGHT_JOIN option is set)
+        reorder tables so dependent tables come after tables they depend 
+        on, otherwise keep tables in the order they were specified in the query 
+      else
+        Apply heuristic: pre-sort all access plans with respect to the number of
+        records accessed.
+    */
+    jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
+  }
+  my_qsort2(join->best_ref + join->const_tables,
+            join->table_count - join->const_tables, sizeof(JOIN_TAB*),
+            jtab_sort_func, (void*)join->emb_sjm_nest);
+  join->cur_sj_inner_tables= 0;
+
   if (straight_join)
   {
     optimize_straight_join(join, join_tables);
@@ -4994,7 +5755,7 @@ choose_plan(JOIN *join, table_map join_tables)
 */
 
 static int
-join_tab_cmp(const void* ptr1, const void* ptr2)
+join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2)
 {
   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
@@ -5016,18 +5777,58 @@ join_tab_cmp(const void* ptr1, const void* ptr2)
 */
 
 static int
-join_tab_cmp_straight(const void* ptr1, const void* ptr2)
+join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2)
+{
+  JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
+  JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
+
+  /*
+    We don't do subquery flattening if the parent or child select has
+    STRAIGHT_JOIN modifier. It is complicated to implement and the semantics
+    is hardly useful.
+  */
+  DBUG_ASSERT(!jt1->emb_sj_nest);
+  DBUG_ASSERT(!jt2->emb_sj_nest);
+
+  if (jt1->dependent & jt2->table->map)
+    return 1;
+  if (jt2->dependent & jt1->table->map)
+    return -1;
+  return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
+}
+
+
+/*
+  Same as join_tab_cmp but tables from within the given semi-join nest go 
+  first. Used when the optimizing semi-join materialization nests.
+*/
+
+static int
+join_tab_cmp_embedded_first(const void *emb,  const void* ptr1, const void* ptr2)
 {
+  const TABLE_LIST *emb_nest= (TABLE_LIST*) emb;
   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
 
+  if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest)
+    return -1;
+  if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest)
+    return 1;
+
   if (jt1->dependent & jt2->table->map)
     return 1;
   if (jt2->dependent & jt1->table->map)
     return -1;
+
+  if (jt1->found_records > jt2->found_records)
+    return 1;
+  if (jt1->found_records < jt2->found_records)
+    return -1; 
+  
   return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
 }
 
+
 /**
   Heuristic procedure to automatically guess a reasonable degree of
   exhaustiveness for the greedy search procedure.
@@ -5063,7 +5864,7 @@ join_tab_cmp_straight(const void* ptr1, const void* ptr2)
 static uint
 determine_search_depth(JOIN *join)
 {
-  uint table_count=  join->tables - join->const_tables;
+  uint table_count=  join->table_count - join->const_tables;
   uint search_depth;
   /* TODO: this value should be determined dynamically, based on statistics: */
   uint max_tables_for_exhaustive_opt= 7;
@@ -5109,17 +5910,23 @@ optimize_straight_join(JOIN *join, table_map join_tables)
 {
   JOIN_TAB *s;
   uint idx= join->const_tables;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
   double    record_count= 1.0;
   double    read_time=    0.0;
- 
+  POSITION  loose_scan_pos;
+
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     /* Find the best access method from 's' to the current partial plan */
-    best_access_path(join, s, join->thd, join_tables, idx,
-                     record_count, read_time);
+    best_access_path(join, s, join_tables, idx, disable_jbuf, record_count,
+                     join->positions + idx, &loose_scan_pos);
+
     /* compute the cost of the new plan extended with 's' */
     record_count*= join->positions[idx].records_read;
     read_time+=    join->positions[idx].read_time;
+    advance_sj_state(join, join_tables, s, idx, &record_count, &read_time,
+                     &loose_scan_pos);
+
     join_tables&= ~(s->table->map);
     ++idx;
   }
@@ -5130,6 +5937,7 @@ optimize_straight_join(JOIN *join, table_map join_tables)
     read_time+= record_count;  // We have to make a temp table
   memcpy((uchar*) join->best_positions, (uchar*) join->positions,
          sizeof(POSITION)*idx);
+  join->record_count= record_count;
   join->best_read= read_time;
 }
 
@@ -5154,7 +5962,7 @@ optimize_straight_join(JOIN *join, table_map join_tables)
 
     All other cases are in-between these two extremes. Thus the parameter
     'search_depth' controlls the exhaustiveness of the search. The higher the
-    value, the longer the optimizaton time and possibly the better the
+    value, the longer the optimization time and possibly the better the
     resulting plan. The lower the value, the fewer alternative plans are
     estimated, but the more likely to get a bad QEP.
 
@@ -5228,11 +6036,17 @@ greedy_search(JOIN      *join,
   uint      size_remain;    // cardinality of remaining_tables
   POSITION  best_pos;
   JOIN_TAB  *best_table; // the next plan node to be added to the curr QEP
+  uint      n_tables; // ==join->tables or # tables in the sj-mat nest we're optimizing
 
   DBUG_ENTER("greedy_search");
 
   /* number of tables that remain to be optimized */
-  size_remain= my_count_bits(remaining_tables);
+  n_tables= size_remain= my_count_bits(remaining_tables &
+                                       (join->emb_sjm_nest? 
+                                         (join->emb_sjm_nest->sj_inner_tables &
+                                          ~join->const_table_map)
+                                         :
+                                         ~(table_map)0));
 
   do {
     /* Find the extension of the current QEP with the lowest cost */
@@ -5252,7 +6066,7 @@ greedy_search(JOIN      *join,
         'join->best_positions' contains a complete optimal extension of the
         current partial QEP.
       */
-      DBUG_EXECUTE("opt", print_plan(join, join->tables,
+      DBUG_EXECUTE("opt", print_plan(join, n_tables,
                                      record_count, read_time, read_time,
                                      "optimal"););
       DBUG_RETURN(FALSE);
@@ -5280,6 +6094,7 @@ greedy_search(JOIN      *join,
     /* This has been already checked by best_extension_by_limited_search */
     DBUG_ASSERT(!is_interleave_error);
 
+
     /* find the position of 'best_table' in 'join->best_ref' */
     best_idx= idx;
     JOIN_TAB *pos= join->best_ref[best_idx];
@@ -5305,6 +6120,139 @@ greedy_search(JOIN      *join,
 
 
 /**
+  Get cost of execution and fanout produced by selected tables in the join
+  prefix (where prefix is defined as prefix in depth-first traversal)
+ 
+  @param end_tab_idx               The number of last tab to be taken into
+                                   account (in depth-first traversal prefix)
+  @param filter_map                Bitmap of tables whose cost/fanout are to 
+                                   be taken into account.
+  @param read_time_arg     [out]   store read time here 
+  @param record_count_arg  [out]   store record count here
+
+  @note
+
+  @returns
+    read_time_arg and record_count_arg contain the computed cost and fanout
+*/
+
+void JOIN::get_partial_cost_and_fanout(uint end_tab_idx,
+                                       table_map filter_map,
+                                       double *read_time_arg, 
+                                       double *record_count_arg)
+{
+  double record_count= 1;
+  double read_time= 0.0;
+  double sj_inner_fanout= 1.0;
+  JOIN_TAB *end_tab= NULL;
+  JOIN_TAB *tab;
+  uint i;
+  uint last_sj_table= MAX_TABLES;
+
+  /* 
+    Handle a special case where the join is degenerate, and produces no
+    records
+  */
+  if (table_count == 0)
+  {
+    *read_time_arg= 0.0;
+    /*
+      We return 1, because 
+       - it is the pessimistic estimate (there might be grouping)
+       - it's safer, as we're less likely to hit the edge cases in
+         calculations.
+    */
+    *record_count_arg=1.0;
+  }
+
+  for (tab= first_depth_first_tab(this), i= const_tables;
+       tab;
+       tab= next_depth_first_tab(this, tab), i++)
+  {
+    end_tab= tab;
+    if (i == end_tab_idx)
+      break;
+  }
+
+  for (tab= first_depth_first_tab(this), i= const_tables;
+       (i <= end_tab_idx && tab);
+       tab= next_depth_first_tab(this, tab), i++)
+  {
+    /* 
+      We've entered the SJM nest that contains the end_tab. The caller is
+      actually 
+      - interested in fanout inside the nest (because that's how many times 
+        we'll invoke the attached WHERE conditions)
+      - not interested in cost
+    */
+    if (end_tab->bush_root_tab && end_tab->bush_root_tab == tab)
+    {
+      /* Ok, end_tab is inside SJM nest and we're entering that nest now */
+      record_count= 1.0;
+      read_time= 0.0;
+    }
+    
+    /* 
+      Ignore fanout (but not cost) from sj-inner tables, as long as 
+      the range that processes them finishes before the end_tab
+    */
+    if (tab->sj_strategy != SJ_OPT_NONE)
+    {
+      sj_inner_fanout= 1.0;
+      last_sj_table= i + tab->n_sj_tables;
+    }
+
+    if (tab->records_read && (tab->table->map & filter_map))
+    {
+      record_count *= tab->records_read;
+      read_time += tab->read_time;
+      if (tab->emb_sj_nest)
+        sj_inner_fanout *= tab->records_read;
+    }
+
+    if (i == last_sj_table)
+    {
+      record_count /= sj_inner_fanout;
+      sj_inner_fanout= 1.0;
+      last_sj_table= MAX_TABLES;
+    }
+  }
+  *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
+  *record_count_arg= record_count;
+}
+
+
+/*
+  Get prefix cost and fanout. This function is different from
+  get_partial_cost_and_fanout:
+   - it operates on a JOIN that haven't yet finished its optimization phase (in
+     particular, fix_semijoin_strategies_for_picked_join_order() and
+     get_best_combination() haven't been called)
+   - it assumes the the join prefix doesn't have any semi-join plans
+
+  These assumptions are met by the caller of the function.
+*/
+
+void JOIN::get_prefix_cost_and_fanout(uint n_tables, 
+                                      double *read_time_arg,
+                                      double *record_count_arg)
+{
+  double record_count= 1;
+  double read_time= 0.0;
+  for (uint i= const_tables; i < n_tables + const_tables ; i++)
+  {
+    if (best_positions[i].records_read)
+    {
+      record_count *= best_positions[i].records_read;
+      read_time += best_positions[i].read_time;
+    }
+  }
+  *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
+  *record_count_arg= record_count;
+}
+
+
+/**
   Find a good, possibly optimal, query execution plan (QEP) by a possibly
   exhaustive search.
 
@@ -5446,25 +6394,42 @@ best_extension_by_limited_search(JOIN      *join,
   JOIN_TAB *s;
   double best_record_count= DBL_MAX;
   double best_read_time=    DBL_MAX;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
 
   DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
                                 "part_plan"););
 
+  /* 
+    If we are searching for the execution plan of a materialized semi-join nest
+    then allowed_tables contains bits only for the tables from this nest.
+  */
+  table_map allowed_tables= ~(table_map)0;
+  if (join->emb_sjm_nest)
+    allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map;
+
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     table_map real_table_bit= s->table->map;
     if ((remaining_tables & real_table_bit) && 
+        (allowed_tables & real_table_bit) &&
         !(remaining_tables & s->dependent) && 
         (!idx || !check_interleaving_with_nj(s)))
     {
       double current_record_count, current_read_time;
+      POSITION *position= join->positions + idx;
 
       /* Find the best access method from 's' to the current partial plan */
-      best_access_path(join, s, thd, remaining_tables, idx,
-                       record_count, read_time);
+      POSITION loose_scan_pos;
+      best_access_path(join, s, remaining_tables, idx, disable_jbuf,
+                       record_count, join->positions + idx, &loose_scan_pos);
+
       /* Compute the cost of extending the plan with 's' */
-      current_record_count= record_count * join->positions[idx].records_read;
-      current_read_time=    read_time + join->positions[idx].read_time;
+
+      current_record_count= record_count * position->records_read;
+      current_read_time=    read_time + position->read_time;
+
+      advance_sj_state(join, remaining_tables, s, idx, &current_record_count,
+                       &current_read_time, &loose_scan_pos);
 
       /* Expand only partial plans with lower cost than the best QEP so far */
       if ((current_read_time +
@@ -5478,6 +6443,7 @@ best_extension_by_limited_search(JOIN      *join,
                                         (double) TIME_FOR_COMPARE),
                                        "prune_by_cost"););
         restore_prev_nj_state(s);
+        restore_prev_sj_state(remaining_tables, s, idx);
         continue;
       }
 
@@ -5495,7 +6461,7 @@ best_extension_by_limited_search(JOIN      *join,
           if (best_record_count >= current_record_count &&
               best_read_time >= current_read_time &&
               /* TODO: What is the reasoning behind this condition? */
-              (!(s->key_dependent & remaining_tables) ||
+              (!(s->key_dependent & allowed_tables & remaining_tables) ||
                join->positions[idx].records_read < 2.0))
           {
             best_record_count= current_record_count;
@@ -5510,11 +6476,12 @@ best_extension_by_limited_search(JOIN      *join,
                                          current_read_time,
                                          "pruned_by_heuristic"););
           restore_prev_nj_state(s);
+          restore_prev_sj_state(remaining_tables, s, idx);
           continue;
         }
       }
 
-      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) )
+      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
       { /* Recursively expand the current partial plan */
         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
         if (best_extension_by_limited_search(join,
@@ -5542,6 +6509,7 @@ best_extension_by_limited_search(JOIN      *join,
         {
           memcpy((uchar*) join->best_positions, (uchar*) join->positions,
                  sizeof(POSITION) * (idx + 1));
+          join->record_count= current_record_count;
           join->best_read= current_read_time - 0.001;
         }
         DBUG_EXECUTE("opt", print_plan(join, idx+1,
@@ -5551,6 +6519,7 @@ best_extension_by_limited_search(JOIN      *join,
                                        "full_plan"););
       }
       restore_prev_nj_state(s);
+      restore_prev_sj_state(remaining_tables, s, idx);
     }
   }
   DBUG_RETURN(FALSE);
@@ -5597,6 +6566,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 
   JOIN_TAB *s;
   double best_record_count=DBL_MAX,best_read_time=DBL_MAX;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
   for (JOIN_TAB **pos=join->best_ref+idx ; (s=*pos) ; pos++)
   {
     table_map real_table_bit=s->table->map;
@@ -5604,8 +6574,9 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
         (!idx|| !check_interleaving_with_nj(s)))
     {
       double records, best;
-      best_access_path(join, s, thd, rest_tables, idx, record_count, 
-                       read_time);
+      POSITION loose_scan_pos;
+      best_access_path(join, s, rest_tables, idx, disable_jbuf, record_count, 
+                       join->positions + idx, &loose_scan_pos);
       records= join->positions[idx].records_read;
       best= join->positions[idx].read_time;
       /*
@@ -5614,6 +6585,9 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
       */
       double current_record_count=record_count*records;
       double current_read_time=read_time+best;
+      advance_sj_state(join, rest_tables, s, idx, &current_record_count, 
+                       &current_read_time, &loose_scan_pos);
+
       if (best_record_count > current_record_count ||
 	  best_read_time > current_read_time ||
 	  (idx == join->const_tables && s->table == join->sort_by_table))
@@ -5632,6 +6606,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 	swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
       }
       restore_prev_nj_state(s);
+      restore_prev_sj_state(rest_tables, s, idx);
       if (join->select_options & SELECT_STRAIGHT_JOIN)
 	break;				// Don't test all combinations
     }
@@ -5644,14 +6619,16 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
   Find how much space the prevous read not const tables takes in cache.
 */
 
-static void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
+void JOIN_TAB::calc_used_field_length(bool max_fl)
 {
-  uint null_fields,blobs,fields,rec_length;
+  uint null_fields,blobs,fields;
+  ulong rec_length;
   Field **f_ptr,*field;
-  MY_BITMAP *read_set= join_tab->table->read_set;;
+  uint uneven_bit_fields;
+  MY_BITMAP *read_set= table->read_set;
 
-  null_fields= blobs= fields= rec_length=0;
-  for (f_ptr=join_tab->table->field ; (field= *f_ptr) ; f_ptr++)
+  uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
+  for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
   {
     if (bitmap_is_set(read_set, field->field_index))
     {
@@ -5662,21 +6639,112 @@ static void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
 	blobs++;
       if (!(flags & NOT_NULL_FLAG))
 	null_fields++;
+      if (field->type() == MYSQL_TYPE_BIT &&
+          ((Field_bit*)field)->bit_len)
+        uneven_bit_fields++;
     }
   }
-  if (null_fields)
-    rec_length+=(join_tab->table->s->null_fields+7)/8;
-  if (join_tab->table->maybe_null)
+  if (null_fields || uneven_bit_fields)
+    rec_length+=(table->s->null_fields+7)/8;
+  if (table->maybe_null)
     rec_length+=sizeof(my_bool);
-  if (blobs)
+  if (max_fl)
   {
-    uint blob_length=(uint) (join_tab->table->file->stats.mean_rec_length-
-			     (join_tab->table->s->reclength- rec_length));
-    rec_length+=(uint) max(4,blob_length);
+    // TODO: to improve this estimate for max expected length 
+    if (blobs)
+    {
+      ulong blob_length= table->file->stats.mean_rec_length;
+      if (ULONG_MAX - rec_length > blob_length)
+        rec_length+=  blob_length;
+      else
+        rec_length= ULONG_MAX;
+    }
+    max_used_fieldlength= rec_length;
+  } 
+  else if (table->file->stats.mean_rec_length)           
+    set_if_smaller(rec_length, table->file->stats.mean_rec_length);
+      
+  /*
+    TODO: why we don't count here rowid that we might need to store when 
+    using DuplicateElimination?
+  */
+  used_fields=fields;
+  used_fieldlength=rec_length;
+  used_blobs=blobs;
+  used_null_fields= null_fields;
+  used_uneven_bit_fields= uneven_bit_fields;
+}
+
+
+/* 
+  @brief
+  Extract pushdown conditions for a table scan
+
+  @details
+  This functions extracts pushdown conditions usable when this table is scanned.
+  The conditions are extracted either from WHERE or from ON expressions.
+  The conditions are attached to the field cache_select of this table.
+
+  @note 
+  Currently the extracted conditions are used only by BNL and BNLH join.
+  algorithms.
+ 
+  @retval  0   on success
+           1   otherwise
+*/ 
+
+int JOIN_TAB::make_scan_filter()
+{
+  COND *tmp;
+  DBUG_ENTER("make_scan_filter");
+
+  Item *cond= is_inner_table_of_outer_join() ?
+                *get_first_inner_table()->on_expr_ref : join->conds;
+  
+  if (cond &&
+      (tmp= make_cond_for_table(join->thd, cond,
+                               join->const_table_map | table->map,
+			       table->map, MAX_TABLES, FALSE, TRUE)))
+  {
+     DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
+     if (!(cache_select=
+          (SQL_SELECT*) join->thd->memdup((uchar*) select, sizeof(SQL_SELECT))))
+	DBUG_RETURN(1);
+     cache_select->cond= tmp;
+     cache_select->read_tables=join->const_table_map;
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  @brief
+  Check whether hash join algorithm can be used to join this table   
+
+  @details
+  This function finds out whether the ref items that have been chosen
+  by the planner to access this table can be used for hash join algorithms.
+  The answer depends on a certain property of the the fields of the
+  joined tables on which the hash join key is built.
+  
+  @note
+  At present the function is supposed to be called only after the function
+  get_best_combination has been called.
+
+  @retval TRUE    it's possible to use hash join to join this table
+  @retval FALSE   otherwise
+*/
+
+bool JOIN_TAB::hash_join_is_possible()
+{
+  if (type != JT_REF && type != JT_EQ_REF)
+    return FALSE;
+  if (!is_ref_for_hash_join())
+  {
+    KEY *keyinfo= table->key_info + ref.key;
+    return keyinfo->key_part[0].field->hash_join_is_possible();
   }
-  join_tab->used_fields=fields;
-  join_tab->used_fieldlength=rec_length;
-  join_tab->used_blobs=blobs;
+  return TRUE;
 }
 
 
@@ -5685,16 +6753,13 @@ cache_record_length(JOIN *join,uint idx)
 {
   uint length=0;
   JOIN_TAB **pos,**end;
-  THD *thd=join->thd;
 
   for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ;
        pos != end ;
        pos++)
   {
     JOIN_TAB *join_tab= *pos;
-    if (!join_tab->used_fieldlength)		/* Not calced yet */
-      calc_used_field_length(thd, join_tab);
-    length+=join_tab->used_fieldlength;
+    length+= join_tab->get_used_fieldlength();
   }
   return length;
 }
@@ -5751,12 +6816,12 @@ cache_record_length(JOIN *join,uint idx)
     Expected number of row combinations
 */
 
-static double
-prev_record_reads(JOIN *join, uint idx, table_map found_ref)
+double
+prev_record_reads(POSITION *positions, uint idx, table_map found_ref)
 {
   double found=1.0;
-  POSITION *pos_end= join->positions - 1;
-  for (POSITION *pos= join->positions + idx - 1; pos != pos_end; pos--)
+  POSITION *pos_end= positions - 1;
+  for (POSITION *pos= positions + idx - 1; pos != pos_end; pos--)
   {
     if (pos->table->table->map & found_ref)
     {
@@ -5785,14 +6850,239 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref)
 }
 
 
-/**
-  Set up join struct according to best position.
+/*
+  Enumerate join tabs in breadth-first fashion, including const tables.
+*/
+
+JOIN_TAB *first_breadth_first_tab(JOIN *join)
+{
+  return join->join_tab; /* There's always one (i.e. first) table */
+}
+
+
+JOIN_TAB *next_breadth_first_tab(JOIN *join, JOIN_TAB *tab)
+{
+  if (!tab->bush_root_tab)
+  {
+    /* We're at top level. Get the next top-level tab */
+    tab++;
+    if (tab < join->join_tab + join->top_join_tab_count)
+      return tab;
+
+    /* No more top-level tabs. Switch to enumerating SJM nest children */
+    tab= join->join_tab;
+  }
+  else
+  {
+    /* We're inside of an SJM nest */
+    if (!tab->last_leaf_in_bush)
+    {
+      /* There's one more table in the nest, return it. */
+      return ++tab;
+    }
+    else
+    {
+      /* 
+        There are no more tables in this nest. Get out of it and then we'll
+        proceed to the next nest.
+      */
+      tab= tab->bush_root_tab + 1;
+    }
+  }
+   
+  /* 
+    Ok, "tab" points to a top-level table, and we need to find the next SJM
+    nest and enter it.
+  */
+  for (; tab < join->join_tab + join->top_join_tab_count; tab++)
+  {
+    if (tab->bush_children)
+      return tab->bush_children->start;
+  }
+  return NULL;
+}
+
+
+JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const)
+{
+  JOIN_TAB *tab= join->join_tab;
+  if (with_const == WITH_CONST_TABLES)
+  {
+    if (join->const_tables == join->table_count)
+      return NULL;
+    tab += join->const_tables;
+  }
+  return tab;
+}
+
+
+JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
+{
+  tab= next_breadth_first_tab(join, tab);
+  if (tab && tab->bush_root_tab)
+    tab= NULL;
+  return tab;
+}
+
+
+JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls)
+{
+  JOIN_TAB *first= join->join_tab;
+  if (const_tbls == WITHOUT_CONST_TABLES)
+    first+= join->const_tables;
+  if (first < join->join_tab + join->top_join_tab_count)
+    return first;
+  return NULL; /* All tables were const tables */
+}
+
+
+/*
+  A helper function to loop over all join's join_tab in sequential fashion
+
+  DESCRIPTION
+    Depending on include_bush_roots parameter, JOIN_TABs that represent
+    SJM-scan/lookups are either returned or omitted.
+
+    SJM-Bush children are returned right after (or in place of) their container
+    join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems
+    to)
+
+    For example, if we have this structure:
+      
+       ot1--ot2--sjm1----------------ot3-...
+                  |
+                  +--it1--it2--it3
+
+    calls to next_linear_tab( include_bush_roots=TRUE) will return:
+      
+      ot1 ot2 sjm1 it1 it2 it3 ot3 ...
+   
+   while calls to next_linear_tab( include_bush_roots=FALSE) will return:
+
+      ot1 ot2 it1 it2 it3 ot3 ...
+
+   (note that sjm1 won't be returned).
+*/
+
+JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, 
+                          enum enum_with_bush_roots include_bush_roots)
+{
+  if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children)
+  {
+    /* This JOIN_TAB is a SJM nest; Start from first table in nest */
+    return tab->bush_children->start;
+  }
+
+  DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab);
+
+  if (tab->bush_root_tab)       /* Are we inside an SJM nest */
+  {
+    /* Inside SJM nest */
+    if (!tab->last_leaf_in_bush)
+      return tab+1;              /* Return next in nest */
+    /* Continue from the sjm on the top level */
+    tab= tab->bush_root_tab;
+  }
+
+  /* If no more JOIN_TAB's on the top level */
+  if (++tab == join->join_tab + join->top_join_tab_count)
+    return NULL;
+
+  if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children)
+  {
+    /* This JOIN_TAB is a SJM nest; Start from first table in nest */
+    tab= tab->bush_children->start;
+  }
+  return tab;
+}
+
+
+/*
+  Start to iterate over all join tables in bush-children-first order, excluding 
+  the const tables (see next_depth_first_tab() comment for details)
+*/
+
+JOIN_TAB *first_depth_first_tab(JOIN* join)
+{
+  JOIN_TAB* tab;
+  /* This means we're starting the enumeration */
+  if (join->const_tables == join->top_join_tab_count)
+    return NULL;
+
+  tab= join->join_tab + join->const_tables;
+
+  return (tab->bush_children) ? tab->bush_children->start : tab;
+}
+
+
+/*
+  A helper function to iterate over all join tables in bush-children-first order
+
+  DESCRIPTION
+   
+  For example, for this join plan
+
+    ot1--ot2--sjm1------------ot3-...
+               |
+               |
+              it1--it2--it3 
+  
+  call to first_depth_first_tab() will return ot1, and subsequent calls to
+  next_depth_first_tab() will return:
+
+     ot2 it1 it2 it3 sjm ot3 ...
+*/
+
+JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab)
+{
+  /* If we're inside SJM nest and have reached its end, get out */
+  if (tab->last_leaf_in_bush)
+    return tab->bush_root_tab;
+  
+  /* Move to next tab in the array we're traversing */
+  tab++;
+  
+  if (tab == join->join_tab +join->top_join_tab_count)
+    return NULL; /* Outside SJM nest and reached EOF */
+
+  if (tab->bush_children)
+    return tab->bush_children->start;
+
+  return tab;
+}
+
+
+static Item * const null_ptr= NULL;
+
+/*
+  Set up join struct according to the picked join order in
+  
+  SYNOPSIS
+    get_best_combination()
+      join  The join to process (the picked join order is mainly in
+            join->best_positions)
+
+  DESCRIPTION
+    Setup join structures according the picked join order
+    - finalize semi-join strategy choices (see
+        fix_semijoin_strategies_for_picked_join_order)
+    - create join->join_tab array and put there the JOIN_TABs in the join order
+    - create data structures describing ref access methods.
+
+  NOTE
+    In this function we switch from pre-join-optimization JOIN_TABs to
+    post-join-optimization JOIN_TABs. This is achieved by copying the entire
+    JOIN_TAB objects.
+ 
+  RETURN 
+    FALSE  OK
+    TRUE   Out of memory
 */
 
 static bool
 get_best_combination(JOIN *join)
 {
-  uint i,tablenr;
+  uint tablenr;
   table_map used_tables;
   JOIN_TAB *join_tab,*j;
   KEYUSE *keyuse;
@@ -5800,7 +7090,7 @@ get_best_combination(JOIN *join)
   THD *thd=join->thd;
   DBUG_ENTER("get_best_combination");
 
-  table_count=join->tables;
+  table_count=join->table_count;
   if (!(join->join_tab=join_tab=
 	(JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count)))
     DBUG_RETURN(TRUE);
@@ -5808,56 +7098,253 @@ get_best_combination(JOIN *join)
   join->full_join=0;
 
   used_tables= OUTER_REF_TABLE_BIT;		// Outer row is already read
+
+  fix_semijoin_strategies_for_picked_join_order(join);
+  
+  JOIN_TAB_RANGE *root_range;
+  if (!(root_range= new JOIN_TAB_RANGE))
+    DBUG_RETURN(TRUE);
+  root_range->start= join->join_tab;
+  /* root_range->end will be set later */
+  join->join_tab_ranges.empty();
+
+  if (join->join_tab_ranges.push_back(root_range))
+    DBUG_RETURN(TRUE);
+
+  JOIN_TAB *sjm_nest_end= NULL;
+  JOIN_TAB *sjm_nest_root= NULL;
+
   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
   {
     TABLE *form;
+    POSITION *cur_pos= &join->best_positions[tablenr];
+    if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || 
+        cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+    {
+      /*
+        Ok, we've entered an SJ-Materialization semi-join (note that this can't
+        be done recursively, semi-joins are not allowed to be nested).
+        1. Put into main join order a JOIN_TAB that represents a lookup or scan
+           in the temptable.
+      */
+      bzero(j, sizeof(JOIN_TAB));
+      j->join= join;
+      j->table= NULL; //temporary way to tell SJM tables from others.
+      j->ref.key = -1;
+      j->on_expr_ref= (Item**) &null_ptr;
+      j->keys= key_map(1); /* The unique index is always in 'possible keys' in EXPLAIN */
+
+      /*
+        2. Proceed with processing SJM nest's join tabs, putting them into the
+           sub-order
+      */
+      SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
+      j->records= j->records_read= (ha_rows)(sjm->is_sj_scan? sjm->rows : 1);
+      JOIN_TAB *jt;
+      JOIN_TAB_RANGE *jt_range;
+      if (!(jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) ||
+          !(jt_range= new JOIN_TAB_RANGE))
+        DBUG_RETURN(TRUE);
+      jt_range->start= jt;
+      jt_range->end= jt + sjm->tables;
+      join->join_tab_ranges.push_back(jt_range);
+      j->bush_children= jt_range;
+      sjm_nest_end= jt + sjm->tables;
+      sjm_nest_root= j;
+
+      j= jt;
+    }
+    
     *j= *join->best_positions[tablenr].table;
-    form=join->all_tables[tablenr]=j->table;
+
+#if 0
+/* SJ-Materialization is represented with join tab ranges */
+    if (j->sj_strategy == SJ_OPT_MATERIALIZE || 
+        j->sj_strategy == SJ_OPT_MATERIALIZE)
+      j->sj_strategy= SJ_OPT_NONE;  
+#endif
+
+    j->bush_root_tab= sjm_nest_root;
+
+    form=join->table[tablenr]=j->table;
     used_tables|= form->map;
     form->reginfo.join_tab=j;
     if (!*j->on_expr_ref)
       form->reginfo.not_exists_optimize=0;	// Only with LEFT JOIN
     DBUG_PRINT("info",("type: %d", j->type));
     if (j->type == JT_CONST)
-      continue;					// Handled in make_join_stat..
+      goto loop_end;					// Handled in make_join_stat..
 
+    j->loosescan_match_tab= NULL;  //non-nulls will be set later
     j->ref.key = -1;
     j->ref.key_parts=0;
 
     if (j->type == JT_SYSTEM)
-      continue;
-    if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key))
+      goto loop_end;
+    if ( !(keyuse= join->best_positions[tablenr].key) || 
+        (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN))
     {
       j->type=JT_ALL;
+      j->index= join->best_positions[tablenr].loosescan_key;
       if (tablenr != join->const_tables)
 	join->full_join=1;
     }
     else if (create_ref_for_key(join, j, keyuse, used_tables))
       DBUG_RETURN(TRUE);                        // Something went wrong
+  loop_end:
+    /* 
+      Save records_read in JOIN_TAB so that select_describe()/etc don't have
+      to access join->best_positions[]. 
+    */
+    j->records_read= (ha_rows)join->best_positions[tablenr].records_read;
+    join->map2table[j->table->tablenr]= j;
+
+    /* If we've reached the end of sjm nest, switch back to main sequence */
+    if (j + 1 == sjm_nest_end)
+    {
+      j->last_leaf_in_bush= TRUE;
+      j= sjm_nest_root;
+      sjm_nest_root= NULL;
+      sjm_nest_end= NULL;
+    }
   }
+  root_range->end= j;
 
-  for (i=0 ; i < table_count ; i++)
-    join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i;
+  join->top_join_tab_count= join->join_tab_ranges.head()->end - 
+                            join->join_tab_ranges.head()->start;
   update_depend_map(join);
   DBUG_RETURN(0);
 }
 
+/**
+  Create a descriptor of hash join key to access a given join table  
 
-static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
-			       table_map used_tables)
+  @param   join         join which the join table belongs to
+  @param   join_tab     the join table to access
+  @param   org_keyuse   beginning of the key uses to join this table
+  @param   used_tables  bitmap of the previous tables
+
+  @details
+  This function first finds key uses that can be utilized by the hash join
+  algorithm to join join_tab to the previous tables marked in the bitmap 
+  used_tables.  The tested key uses are taken from the array of all key uses
+  for 'join' starting from the position org_keyuse. After all interesting key
+  uses have been found the function builds a descriptor of the corresponding
+  key that is used by the hash join algorithm would it be chosen to join
+  the table join_tab.
+
+  @retval  FALSE  the descriptor for a hash join key is successfully created
+  @retval  TRUE   otherwise
+*/
+
+static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab,
+                                    KEYUSE *org_keyuse, table_map used_tables)
 {
-  KEYUSE *keyuse=org_keyuse;
-  bool ftkey=(keyuse->keypart == FT_KEYPART);
+  KEY *keyinfo;
+  KEY_PART_INFO *key_part_info;
+  KEYUSE *keyuse= org_keyuse;
+  uint key_parts= 0;
   THD  *thd= join->thd;
-  uint keyparts,length,key;
+  TABLE *table= join_tab->table;
+  bool first_keyuse= TRUE;
+  DBUG_ENTER("create_hj_key_for_table");
+
+  do
+  {
+    if (!(~used_tables & keyuse->used_tables) &&
+	(first_keyuse || keyuse->keypart != (keyuse-1)->keypart))
+      key_parts++;
+    first_keyuse= FALSE;
+    keyuse++;
+  } while (keyuse->table == table && keyuse->is_for_hash_join());
+  if (!key_parts)
+    DBUG_RETURN(TRUE);
+  /* This memory is allocated only once for the joined table join_tab */
+  if (!(keyinfo= (KEY *) thd->alloc(sizeof(KEY))) ||
+      !(key_part_info = (KEY_PART_INFO *) thd->alloc(sizeof(KEY_PART_INFO)*
+                                                     key_parts)))
+    DBUG_RETURN(TRUE);
+  keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
+  keyinfo->key_part= key_part_info;
+  keyinfo->key_length=0;
+  keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+  keyinfo->flags= HA_GENERATED_KEY;
+  keyinfo->name= (char *) "$hj";
+  keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts);
+  if (!keyinfo->rec_per_key)
+    DBUG_RETURN(TRUE);
+  keyinfo->key_part= key_part_info;
+
+  first_keyuse= TRUE;
+  keyuse= org_keyuse;
+  do
+  {
+    if (!(~used_tables & keyuse->used_tables) &&
+        (first_keyuse || keyuse->keypart != (keyuse-1)->keypart))
+    {
+      Field *field= table->field[keyuse->keypart];
+      uint fieldnr= keyuse->keypart+1;
+      table->create_key_part_by_field(keyinfo, key_part_info, field, fieldnr);
+      first_keyuse= FALSE;
+      key_part_info++;
+    }
+    keyuse++;
+  } while (keyuse->table == table && keyuse->is_for_hash_join());
+
+  join_tab->hj_key= keyinfo;
+
+  DBUG_RETURN(FALSE);
+}
+
+/* 
+  Check if a set of tables specified by used_tables can be accessed when
+  we're doing scan on join_tab jtab.
+*/
+static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables)
+{
+  if (jtab->bush_root_tab)
+  {
+    /*
+      jtab is inside execution join nest. We may not refer to outside tables,
+      except the const tables.
+    */
+    table_map local_tables= jtab->emb_sj_nest->nested_join->used_tables |
+                            jtab->join->const_table_map;
+    return !test(used_tables & ~local_tables);
+  }
+
+  /* 
+    If we got here then jtab is at top level. 
+     - all other tables at top level are accessible,
+     - tables in join nests are accessible too, because all their columns that 
+       are needed at top level will be unpacked when scanning the
+       materialization table.
+  */
+  return TRUE;
+}
+
+static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
+                               KEYUSE *org_keyuse, table_map used_tables)
+{
+  uint keyparts, length, key;
   TABLE *table;
   KEY *keyinfo;
+  KEYUSE *keyuse= org_keyuse;
+  bool ftkey= (keyuse->keypart == FT_KEYPART);
+  THD *thd= join->thd;
   DBUG_ENTER("create_ref_for_key");
 
   /*  Use best key from find_best */
-  table=j->table;
-  key=keyuse->key;
-  keyinfo=table->key_info+key;
+  table= j->table;
+  key= keyuse->key;
+  if (!is_hash_join_key_no(key))
+    keyinfo= table->key_info+key;
+  else
+  {
+    if (create_hj_key_for_table(join, j, org_keyuse, used_tables))
+      DBUG_RETURN(TRUE);
+    keyinfo= j->hj_key;
+  }
 
   if (ftkey)
   {
@@ -5880,27 +7367,31 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
     {
       if (!(~used_tables & keyuse->used_tables))
       {
-	if (keyparts == keyuse->keypart &&
-	    !(found_part_ref_or_null & keyuse->optimize))
-	{
-	  keyparts++;
-	  length+= keyinfo->key_part[keyuse->keypart].store_length;
-	  found_part_ref_or_null|= keyuse->optimize;
-	}
+        if  (are_tables_local(j, keyuse->val->used_tables()))
+        {
+          if ((is_hash_join_key_no(key) && 
+              (keyparts == 0 || keyuse->keypart != (keyuse-1)->keypart)) ||
+              (!is_hash_join_key_no(key) && keyparts == keyuse->keypart &&
+               !(found_part_ref_or_null & keyuse->optimize)))
+          {
+             length+= keyinfo->key_part[keyparts].store_length;
+             keyparts++;
+             found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
+          }
+        }
       }
       keyuse++;
     } while (keyuse->table == table && keyuse->key == key);
   } /* not ftkey */
 
   /* set up fieldref */
-  keyinfo=table->key_info+key;
-  j->ref.key_parts=keyparts;
-  j->ref.key_length=length;
-  j->ref.key=(int) key;
+  j->ref.key_parts= keyparts;
+  j->ref.key_length= length;
+  j->ref.key= (int) key;
   if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) ||
       !(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
-						   (keyparts+1)))) ||
-      !(j->ref.items=    (Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
+						          (keyparts+1)))) ||
+      !(j->ref.items=(Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
       !(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
   {
     DBUG_RETURN(TRUE);
@@ -5909,11 +7400,13 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
   j->ref.key_err=1;
   j->ref.has_record= FALSE;
   j->ref.null_rejecting= 0;
-  j->ref.use_count= 0;
+  j->ref.disable_cache= FALSE;
+  j->ref.null_ref_part= NO_REF_PART;
   keyuse=org_keyuse;
 
   store_key **ref_key= j->ref.key_copy;
   uchar *key_buff=j->ref.key_buff, *null_ref_key= 0;
+  uint null_ref_part= NO_REF_PART;
   bool keyuse_uses_no_tables= TRUE;
   if (ftkey)
   {
@@ -5930,9 +7423,11 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
     uint i;
     for (i=0 ; i < keyparts ; keyuse++,i++)
     {
-      while (keyuse->keypart != i ||
-	     ((~used_tables) & keyuse->used_tables))
-	keyuse++;				/* Skip other parts */
+      while (((~used_tables) & keyuse->used_tables) || 
+	     (keyuse->keypart != 
+              (is_hash_join_key_no(key) ?
+                 keyinfo->key_part[i].field->field_index : i))) 
+	 keyuse++;                              	/* Skip other parts */ 
 
       uint maybe_null= test(keyinfo->key_part[i].null_bit);
       j->ref.items[i]=keyuse->val;		// Save for cond removal
@@ -5940,13 +7435,15 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
       if (keyuse->null_rejecting) 
         j->ref.null_rejecting |= 1 << i;
       keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
-      if (!keyuse->used_tables &&
-	  !(join->select_options & SELECT_DESCRIBE))
+      if (!keyuse->used_tables && !thd->lex->describe)
       {					// Compare against constant
-	store_key_item tmp(thd, keyinfo->key_part[i].field,
+	store_key_item tmp(thd, 
+                           keyinfo->key_part[i].field,
                            key_buff + maybe_null,
                            maybe_null ?  key_buff : 0,
-                           keyinfo->key_part[i].length, keyuse->val);
+                           keyinfo->key_part[i].length,
+                           keyuse->val,
+                           FALSE);
 	if (thd->is_fatal_error)
 	  DBUG_RETURN(TRUE);
 	tmp.copy();
@@ -5962,8 +7459,11 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
 	instead of JT_REF_OR_NULL in case if field can't be null
       */
       if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
+      {
 	null_ref_key= key_buff;
-      key_buff+=keyinfo->key_part[i].store_length;
+        null_ref_part= i;
+      }
+      key_buff+= keyinfo->key_part[i].store_length;
     }
   } /* not ftkey */
   *ref_key=0;				// end_marker
@@ -5977,6 +7477,7 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
     /* Must read with repeat */
     j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
     j->ref.null_ref_key= null_ref_key;
+    j->ref.null_ref_part= null_ref_part;
   }
   else if (keyuse_uses_no_tables)
   {
@@ -6011,9 +7512,10 @@ get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
   }
   else if (keyuse->val->type() == Item::FIELD_ITEM ||
            (keyuse->val->type() == Item::REF_ITEM &&
-            ((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF &&
-            (*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() ==
-             Item_ref::DIRECT_REF && 
+	    ((((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF &&
+              (*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() ==
+              Item_ref::DIRECT_REF) || 
+             ((Item_ref*)keyuse->val)->ref_type() == Item_ref::VIEW_REF) &&
             keyuse->val->real_item()->type() == Item::FIELD_ITEM))
     return new store_key_field(thd,
 			       key_part->field,
@@ -6021,13 +7523,13 @@ get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
 			       maybe_null ? key_buff : 0,
 			       key_part->length,
 			       ((Item_field*) keyuse->val->real_item())->field,
-			       keyuse->val->full_name());
+			       keyuse->val->real_item()->full_name());
   return new store_key_item(thd,
 			    key_part->field,
 			    key_buff + maybe_null,
 			    maybe_null ? key_buff : 0,
 			    key_part->length,
-			    keyuse->val);
+			    keyuse->val, FALSE);
 }
 
 /**
@@ -6085,10 +7587,12 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
     DBUG_RETURN(TRUE);                        /* purecov: inspected */
 
   join_tab= parent->join_tab_reexec;
-  parent->table_reexec[0]= temp_table;
-  tables= 1;
+  table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table;
+  table_count= top_join_tab_count= 1;
+
   const_tables= 0;
   const_table_map= 0;
+  eliminated_tables= 0;
   tmp_table_param.field_count= tmp_table_param.sum_func_count=
     tmp_table_param.func_count= 0;
   /*
@@ -6104,10 +7608,13 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   row_limit= unit->select_limit_cnt;
   do_send_rows= row_limit ? 1 : 0;
 
-  join_tab->cache.buff=0;			/* No caching */
+  join_tab->use_join_cache= FALSE;
+  join_tab->cache=0;			        /* No caching */
   join_tab->table=temp_table;
+  join_tab->cache_select= 0;
   join_tab->select=0;
-  join_tab->select_cond=0;
+  join_tab->select_cond= 0;                     // Avoid valgrind warning
+  join_tab->set_select_cond(NULL, __LINE__);
   join_tab->quick=0;
   join_tab->type= JT_ALL;			/* Map through all records */
   join_tab->keys.init();
@@ -6119,8 +7626,18 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   join_tab->ref.key = -1;
   join_tab->not_used_in_distinct=0;
   join_tab->read_first_record= join_init_read_record;
+  join_tab->preread_init_done= FALSE;
   join_tab->join= this;
   join_tab->ref.key_parts= 0;
+  join_tab->keep_current_rowid= FALSE;
+  join_tab->flush_weedout_table= join_tab->check_weed_out_table= NULL;
+  join_tab->do_firstmatch= NULL;
+  join_tab->loosescan_match_tab= NULL;
+  join_tab->emb_sj_nest= NULL;
+  join_tab->pre_idx_push_select_cond= NULL;
+  join_tab->bush_root_tab= NULL;
+  join_tab->bush_children= NULL;
+  join_tab->last_leaf_in_bush= FALSE;
   bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record));
   temp_table->status=0;
   temp_table->null_row=0;
@@ -6128,15 +7645,17 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
 }
 
 
-inline void add_cond_and_fix(Item **e1, Item *e2)
+inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2)
 {
   if (*e1)
   {
+    if (!e2)
+      return;
     Item *res;
     if ((res= new Item_cond_and(*e1, e2)))
     {
       *e1= res;
-      res->quick_fix_field();
+      res->fix_fields(thd, 0);
       res->update_used_tables();
     }
   }
@@ -6199,12 +7718,13 @@ inline void add_cond_and_fix(Item **e1, Item *e2)
 static void add_not_null_conds(JOIN *join)
 {
   DBUG_ENTER("add_not_null_conds");
-  for (uint i=join->const_tables ; i < join->tables ; i++)
+  
+  for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
-    if ((tab->type == JT_REF || tab->type == JT_EQ_REF || 
-         tab->type == JT_REF_OR_NULL) &&
-        !tab->table->maybe_null)
+    if (tab->type == JT_REF || tab->type == JT_EQ_REF || 
+        tab->type == JT_REF_OR_NULL)
     {
       for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
       {
@@ -6212,15 +7732,16 @@ static void add_not_null_conds(JOIN *join)
         {
           Item *item= tab->ref.items[keypart];
           Item *notnull;
-          DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
-          Item_field *not_null_item= (Item_field*)item;
+          Item *real= item->real_item();
+          DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
+          Item_field *not_null_item= (Item_field*)real;
           JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
           /*
             For UPDATE queries such as:
             UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
             not_null_item is the t1.f1, but it's referred_tab is 0.
           */
-          if (!referred_tab || referred_tab->join != join)
+          if (!referred_tab)
             continue;
           if (!(notnull= new Item_func_isnotnull(not_null_item)))
             DBUG_VOID_RETURN;
@@ -6233,9 +7754,21 @@ static void add_not_null_conds(JOIN *join)
           if (notnull->fix_fields(join->thd, &notnull))
             DBUG_VOID_RETURN;
           DBUG_EXECUTE("where",print_where(notnull,
-                                           referred_tab->table->alias,
+                                           referred_tab->table->alias.c_ptr(),
                                            QT_ORDINARY););
-          add_cond_and_fix(&referred_tab->select_cond, notnull);
+          if (!tab->first_inner)
+	  {
+            COND *new_cond= referred_tab->join == join ? 
+                              referred_tab->select_cond :
+                              join->outer_ref_cond;
+            add_cond_and_fix(join->thd, &new_cond, notnull);
+            if (referred_tab->join == join)
+              referred_tab->set_select_cond(new_cond, __LINE__);
+            else 
+              join->outer_ref_cond= new_cond;
+          }
+          else
+            add_cond_and_fix(join->thd, tab->first_inner->on_expr_ref, notnull);
         }
       }
     }
@@ -6250,6 +7783,12 @@ static void add_not_null_conds(JOIN *join)
   nested outer join and so on until it reaches root_tab
   (root_tab can be 0).
 
+  In other words:
+  add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should 
+  wrap parts of WHERE.  The idea is that the part of WHERE should be only
+  evaluated after we've finished figuring out whether outer joins.
+  ^^^ is the above correct?
+
   @param tab       the first inner table for most nested outer join
   @param cond      the predicate to be guarded (must be set)
   @param root_tab  the first inner table to stop
@@ -6277,6 +7816,12 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
 }
 
 
+bool TABLE_LIST::is_active_sjm()
+{ 
+  return sj_mat_info && sj_mat_info->is_used;
+}
+
+
 /**
   Fill in outer join related info for the execution plan structure.
 
@@ -6294,6 +7839,12 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
     corresponding first inner table through the field t0->on_expr_ref.
     Here ti are structures of the JOIN_TAB type.
 
+    In other words, for each join tab, set
+     - first_inner
+     - last_inner
+     - first_upper
+     - on_expr_ref, cond_equal
+
   EXAMPLE. For the query: 
   @code
         SELECT * FROM t1
@@ -6319,14 +7870,33 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
     has been chosen.
 */
 
-static void
+static bool
 make_outerjoin_info(JOIN *join)
 {
   DBUG_ENTER("make_outerjoin_info");
-  for (uint i=join->const_tables ; i < join->tables ; i++)
+  
+  /*
+    Create temp. tables for merged SJ-Materialization nests. We need to do
+    this now, because further code relies on tab->table and
+    tab->table->pos_in_table_list being set.
+  */
+  JOIN_TAB *tab;
+  for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
-    TABLE *table=tab->table;
+    if (tab->bush_children)
+    {
+      if (setup_sj_materialization_part1(tab))
+        DBUG_RETURN(TRUE);
+      tab->table->reginfo.join_tab= tab;
+    }
+  }
+
+  for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    TABLE *table= tab->table;
     TABLE_LIST *tbl= table->pos_in_table_list;
     TABLE_LIST *embedding= tbl->embedding;
 
@@ -6340,11 +7910,19 @@ make_outerjoin_info(JOIN *join)
       tab->last_inner= tab->first_inner= tab;
       tab->on_expr_ref= &tbl->on_expr;
       tab->cond_equal= tbl->cond_equal;
-      if (embedding)
+      if (embedding && !embedding->is_active_sjm())
         tab->first_upper= embedding->nested_join->first_nested;
     }    
     for ( ; embedding ; embedding= embedding->embedding)
     {
+      if (embedding->is_active_sjm())
+      {
+        /* We're trying to walk out of an SJ-Materialization nest. Don't do this.  */
+        break;
+      }
+      /* Ignore sj-nests: */
+      if (!(embedding->on_expr && embedding->outer_join))
+        continue;
       NESTED_JOIN *nested_join= embedding->nested_join;
       if (!nested_join->counter)
       {
@@ -6360,13 +7938,20 @@ make_outerjoin_info(JOIN *join)
       }
       if (!tab->first_inner)  
         tab->first_inner= nested_join->first_nested;
-      if (++nested_join->counter < nested_join->join_list.elements)
+      if (tab->table->reginfo.not_exists_optimize)
+        tab->first_inner->table->reginfo.not_exists_optimize= 1;         
+      if (++nested_join->counter < nested_join->n_tables)
         break;
       /* Table tab is the last inner table for nested join. */
       nested_join->first_nested->last_inner= tab;
+      if (tab->first_inner->table->reginfo.not_exists_optimize)
+      {
+        for (JOIN_TAB *join_tab= tab->first_inner; join_tab <= tab; join_tab++)
+          join_tab->table->reginfo.not_exists_optimize= 1;
+      } 
     }
   }
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -6379,75 +7964,114 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
   {
     add_not_null_conds(join);
     table_map used_tables;
+    /*
+      Step #1: Extract constant condition
+       - Extract and check the constant part of the WHERE 
+       - Extract constant parts of ON expressions from outer 
+         joins and attach them appropriately.
+    */
     if (cond)                /* Because of QUICK_GROUP_MIN_MAX_SELECT */
     {                        /* there may be a select without a cond. */    
-      if (join->tables > 1)
+      if (join->table_count > 1)
         cond->update_used_tables();		// Tablenr may have changed
-      if (join->const_tables == join->tables &&
+      if (join->const_tables == join->table_count &&
 	  thd->lex->current_select->master_unit() ==
 	  &thd->lex->unit)		// not upper level SELECT
         join->const_table_map|=RAND_TABLE_BIT;
+
+      /*
+        Extract expressions that depend on constant tables
+        1. Const part of the join's WHERE clause can be checked immediately
+           and if it is not satisfied then the join has empty result
+        2. Constant parts of outer joins' ON expressions must be attached 
+           there inside the triggers.
+      */
       {						// Check const tables
-        COND *const_cond=
-	  make_cond_for_table(cond,
+        join->exec_const_cond=
+	  make_cond_for_table(thd, cond,
                               join->const_table_map,
-                              (table_map) 0);
-        DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY););
-        for (JOIN_TAB *tab= join->join_tab+join->const_tables;
-             tab < join->join_tab+join->tables ; tab++)
+                              (table_map) 0, MAX_TABLES, FALSE, FALSE);
+        /* Add conditions added by add_not_null_conds(). */
+        for (uint i= 0 ; i < join->const_tables ; i++)
+          add_cond_and_fix(thd, &join->exec_const_cond,
+                           join->join_tab[i].select_cond);
+
+        DBUG_EXECUTE("where",print_where(join->exec_const_cond,"constants",
+					 QT_ORDINARY););
+        if (join->exec_const_cond && !join->exec_const_cond->is_expensive() &&
+            !join->exec_const_cond->val_int())
         {
-          if (*tab->on_expr_ref)
-          {
-            JOIN_TAB *cond_tab= tab->first_inner;
-            COND *tmp= make_cond_for_table(*tab->on_expr_ref,
-                                           join->const_table_map,
-                                         (  table_map) 0);
-            if (!tmp)
-              continue;
-            tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
-            if (!tmp)
-              DBUG_RETURN(1);
-            tmp->quick_fix_field();
-            cond_tab->select_cond= !cond_tab->select_cond ? tmp :
-	                            new Item_cond_and(cond_tab->select_cond,
-                                                      tmp);
-            if (!cond_tab->select_cond)
-	      DBUG_RETURN(1);
-            cond_tab->select_cond->quick_fix_field();
-          }       
+          DBUG_PRINT("info",("Found impossible WHERE condition"));
+          join->exec_const_cond= NULL;
+          DBUG_RETURN(1);	 // Impossible const condition
         }
-        if (const_cond && !const_cond->val_int())
-        {
-	  DBUG_PRINT("info",("Found impossible WHERE condition"));
-	  DBUG_RETURN(1);	 // Impossible const condition
+
+        COND *outer_ref_cond= make_cond_for_table(thd, cond, 
+                                                  OUTER_REF_TABLE_BIT,
+                                                  OUTER_REF_TABLE_BIT,
+                                                  MAX_TABLES, FALSE, FALSE);
+        if (outer_ref_cond)
+	{
+          add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond);
+          join->outer_ref_cond= outer_ref_cond;
         }
       }
     }
+
+    /*
+      Step #2: Extract WHERE/ON parts
+    */
+    table_map save_used_tables= 0;
     used_tables=((select->const_tables=join->const_table_map) |
 		 OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
-    for (uint i=join->const_tables ; i < join->tables ; i++)
+    JOIN_TAB *tab;
+    table_map current_map;
+    uint i= join->const_tables;
+    for (tab= first_depth_first_tab(join); tab;
+         tab= next_depth_first_tab(join, tab), i++)
     {
-      JOIN_TAB *tab=join->join_tab+i;
+      bool is_hj;
       /*
         first_inner is the X in queries like:
         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
       */
-      JOIN_TAB *first_inner_tab= tab->first_inner; 
-      table_map current_map= tab->table->map;
+      JOIN_TAB *first_inner_tab= tab->first_inner;
+
+      if (tab->table)
+        current_map= tab->table->map;
+      else
+        current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables;
+
       bool use_quick_range=0;
       COND *tmp;
 
+      /* 
+        Tables that are within SJ-Materialization nests cannot have their
+        conditions referring to preceding non-const tables.
+         - If we're looking at the first SJM table, reset used_tables
+           to refer to only allowed tables
+      */
+      if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info && 
+          tab->emb_sj_nest->sj_mat_info->is_used &&
+          !(used_tables & tab->emb_sj_nest->sj_inner_tables))
+      {
+        save_used_tables= used_tables;
+        used_tables= join->const_table_map | OUTER_REF_TABLE_BIT | 
+                     RAND_TABLE_BIT;
+      }
+
       /*
 	Following force including random expression in last table condition.
 	It solve problem with select like SELECT * FROM t1 WHERE rand() > 0.5
       */
-      if (i == join->tables-1)
+      if (tab == join->join_tab + join->top_join_tab_count - 1)
 	current_map|= OUTER_REF_TABLE_BIT | RAND_TABLE_BIT;
       used_tables|=current_map;
 
       if (tab->type == JT_REF && tab->quick &&
-	  (uint) tab->ref.key == tab->quick->index &&
-	  tab->ref.key_length < tab->quick->max_used_key_length)
+	  (((uint) tab->ref.key == tab->quick->index &&
+	    tab->ref.key_length < tab->quick->max_used_key_length) ||
+	    tab->table->intersect_keys.is_set(tab->ref.key)))
       {
 	/* Range uses longer key;  Use this instead of ref on key */
 	tab->type=JT_ALL;
@@ -6460,16 +8084,44 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           We will use join cache here : prevent sorting of the first
           table only and sort at the end.
         */
-        if (i != join->const_tables && join->tables > join->const_tables + 1)
+        if (i != join->const_tables && join->table_count > join->const_tables + 1)
           join->full_join= 1;
       }
 
       tmp= NULL;
+
       if (cond)
-        tmp= make_cond_for_table(cond,used_tables,current_map);
+      {
+        if (tab->bush_children)
+        {
+          // Reached the materialization tab
+          tmp= make_cond_after_sjm(cond, cond, save_used_tables, used_tables);
+          used_tables= save_used_tables | used_tables;
+          save_used_tables= 0;
+        }
+        else
+         {
+	  tmp= make_cond_for_table(thd, cond, used_tables, current_map, i,
+                                   FALSE, FALSE);
+         }
+        /* Add conditions added by add_not_null_conds(). */
+        if (tab->select_cond)
+          add_cond_and_fix(thd, &tmp, tab->select_cond);
+      }
+
+      is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) &&
+             (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
+	     ((join->max_allowed_join_cache_level+1)/2 == 2 ||
+              ((join->max_allowed_join_cache_level+1)/2 > 2 &&
+	       is_hash_join_key_no(tab->ref.key))) &&
+              (!tab->emb_sj_nest ||                     
+               join->allowed_semijoin_with_cache) && 
+              (!(tab->table->map & join->outer_join) ||
+               join->allowed_outer_join_with_cache);
+
       if (cond && !tmp && tab->quick)
       {						// Outer join
-        if (tab->type != JT_ALL)
+        if (tab->type != JT_ALL && !is_hj)
         {
           /*
             Don't use the quick method
@@ -6491,9 +8143,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
         }
 
       }
-      if (tmp || !cond || tab->type == JT_REF)
+      if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
+          tab->type == JT_EQ_REF || first_inner_tab)
       {
-        DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
+        DBUG_EXECUTE("where",print_where(tmp, 
+                                         tab->table? tab->table->alias.c_ptr() :"sjm-nest",
+                                         QT_ORDINARY););
 	SQL_SELECT *sel= tab->select= ((SQL_SELECT*)
                                        thd->memdup((uchar*) select,
                                                    sizeof(*select)));
@@ -6513,35 +8168,49 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           */
           if (!(tmp= add_found_match_trig_cond(first_inner_tab, tmp, 0)))
             DBUG_RETURN(1);
-          tab->select_cond=sel->cond=tmp;
+          sel->cond= tmp;
+          tab->set_select_cond(tmp, __LINE__);
           /* Push condition to storage engine if this is enabled
              and the condition is not guarded */
-	  if (thd->variables.optimizer_switch &
-              OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN)
+          if (tab->table)
           {
-            COND *push_cond= 
-              make_cond_for_table(tmp, tab->table->map, tab->table->map);
-            if (push_cond)
+            tab->table->file->pushed_cond= NULL;
+            if ((thd->variables.optimizer_switch &
+                               OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) &&
+                !first_inner_tab)
             {
-              /* Push condition to handler */
-              if (!tab->table->file->cond_push(push_cond))
-                tab->table->file->pushed_cond= push_cond;
+              COND *push_cond= 
+              make_cond_for_table(thd, tmp, current_map, current_map,
+                                  MAX_TABLES, FALSE, FALSE);
+              if (push_cond)
+              {
+                /* Push condition to handler */
+                if (!tab->table->file->cond_push(push_cond))
+                  tab->table->file->pushed_cond= push_cond;
+              }
             }
           }
         }
         else
-          tab->select_cond= sel->cond= NULL;
+        {
+          sel->cond= NULL;
+          tab->set_select_cond(NULL, __LINE__);
+        }
 
 	sel->head=tab->table;
-        DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
+        DBUG_EXECUTE("where",
+                     print_where(tmp, 
+                                 tab->table ? tab->table->alias.c_ptr() :
+                                   "(sjm-nest)",
+                                 QT_ORDINARY););
 	if (tab->quick)
 	{
 	  /* Use quick key read if it's a constant and it's not used
 	     with key reading */
-          if (tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
+          if ((tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
               tab->type != JT_FT &&
               ((tab->type != JT_CONST && tab->type != JT_REF) ||
-               (uint)tab->ref.key == tab->quick->index))
+               (uint) tab->ref.key == tab->quick->index)) || is_hj)
           {
             DBUG_ASSERT(tab->quick->is_valid());
 	    sel->quick=tab->quick;		// Use value from get_quick_...
@@ -6554,7 +8223,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	  }
 	  tab->quick=0;
 	}
-	uint ref_key=(uint) sel->head->reginfo.join_tab->ref.key+1;
+	uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0;
 	if (i == join->const_tables && ref_key)
 	{
 	  if (!tab->const_keys.is_clear_all() &&
@@ -6573,12 +8242,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	    the index if we are using limit and this is the first table
 	  */
 
-	  if ((cond &&
-              !tab->keys.is_subset(tab->const_keys) && i > 0) ||
-	      (!tab->const_keys.is_clear_all() && i == join->const_tables &&
-	       join->unit->select_limit_cnt <
-	       join->best_positions[i].records_read &&
-	       !(join->select_options & OPTION_FOUND_ROWS)))
+	  if (!tab->table->is_filled_at_execution() &&
+              ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
+               (!tab->const_keys.is_clear_all() && i == join->const_tables &&
+                join->unit->select_limit_cnt <
+                join->best_positions[i].records_read &&
+                !(join->select_options & OPTION_FOUND_ROWS))))
 	  {
 	    /* Join with outer join condition */
 	    COND *orig_cond=sel->cond;
@@ -6595,11 +8264,13 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	      sel->cond->quick_fix_field();
 
 	    if (sel->test_quick_select(thd, tab->keys,
-				       used_tables & ~ current_map,
+				       ((used_tables & ~ current_map) |
+                                        OUTER_REF_TABLE_BIT),
 				       (join->select_options &
 					OPTION_FOUND_ROWS ?
 					HA_POS_ERROR :
-					join->unit->select_limit_cnt), 0) < 0)
+					join->unit->select_limit_cnt), 0,
+                                        FALSE) < 0)
             {
 	      /*
 		Before reporting "Impossible WHERE" for the whole query
@@ -6612,7 +8283,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
                                          (join->select_options &
                                           OPTION_FOUND_ROWS ?
                                           HA_POS_ERROR :
-                                          join->unit->select_limit_cnt),0) < 0)
+                                          join->unit->select_limit_cnt),0,
+                                          FALSE) < 0)
 		DBUG_RETURN(1);			// Impossible WHERE
             }
             else
@@ -6639,26 +8311,17 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	      2 : 1;
 	    sel->read_tables= used_tables & ~current_map;
 	  }
-	  if (i != join->const_tables && tab->use_quick != 2)
+	  if (i != join->const_tables && tab->use_quick != 2 &&
+              !tab->first_inner)
 	  {					/* Read with cache */
-	    if (cond &&
-                (tmp=make_cond_for_table(cond,
-					 join->const_table_map |
-					 current_map,
-					 current_map)))
-	    {
-              DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
-	      tab->cache.select=(SQL_SELECT*)
-		thd->memdup((uchar*) sel, sizeof(SQL_SELECT));
-	      tab->cache.select->cond=tmp;
-	      tab->cache.select->read_tables=join->const_table_map;
-	    }
-	  }
+            if (tab->make_scan_filter())
+              DBUG_RETURN(1);
+          }
 	}
       }
       
       /* 
-        Push down conditions from all on expressions.
+        Push down conditions from all ON expressions.
         Each of these conditions are guarded by a variable
         that turns if off just before null complemented row for
         outer joins is formed. Thus, the condition from an
@@ -6666,16 +8329,32 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
         the null complemented row.
       */ 
 
-      /* First push down constant conditions from on expressions */
-      for (JOIN_TAB *join_tab= join->join_tab+join->const_tables;
-           join_tab < join->join_tab+join->tables ; join_tab++)
+      /* 
+        First push down constant conditions from ON expressions. 
+         - Each pushed-down condition is wrapped into trigger which is 
+           enabled only for non-NULL-complemented record
+         - The condition is attached to the first_inner_table.
+        
+        With regards to join nests:
+         - if we start at top level, don't walk into nests
+         - if we start inside a nest, stay within that nest.
+      */
+      JOIN_TAB *start_from= tab->bush_root_tab? 
+                               tab->bush_root_tab->bush_children->start : 
+                               join->join_tab + join->const_tables;
+      JOIN_TAB *end_with= tab->bush_root_tab? 
+                               tab->bush_root_tab->bush_children->end : 
+                               join->join_tab + join->top_join_tab_count;
+      for (JOIN_TAB *join_tab= start_from;
+           join_tab != end_with;
+           join_tab++)
       {
         if (*join_tab->on_expr_ref)
         {
           JOIN_TAB *cond_tab= join_tab->first_inner;
-          COND *tmp= make_cond_for_table(*join_tab->on_expr_ref,
+          COND *tmp= make_cond_for_table(thd, *join_tab->on_expr_ref,
                                          join->const_table_map,
-                                         (table_map) 0);
+                                         (table_map) 0, MAX_TABLES, FALSE, FALSE);
           if (!tmp)
             continue;
           tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
@@ -6687,13 +8366,22 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           if (!cond_tab->select_cond)
 	    DBUG_RETURN(1);
           cond_tab->select_cond->quick_fix_field();
+          cond_tab->select_cond->update_used_tables();
+          if (cond_tab->select)
+            cond_tab->select->cond= cond_tab->select_cond; 
         }       
       }
 
-      /* Push down non-constant conditions from on expressions */
+
+      /* Push down non-constant conditions from ON expressions */
       JOIN_TAB *last_tab= tab;
+
+      /*
+        while we're inside of an outer join and last_tab is 
+        the last of its tables ... 
+      */
       while (first_inner_tab && first_inner_tab->last_inner == last_tab)
-      {  
+      { 
         /* 
           Table tab is the last inner table of an outer join.
           An on expression is always attached to it.
@@ -6702,15 +8390,29 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 
         table_map used_tables2= (join->const_table_map |
                                  OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
-	for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++)
+
+        start_from= tab->bush_root_tab? 
+                      tab->bush_root_tab->bush_children->start : 
+                      join->join_tab + join->const_tables;
+        for (JOIN_TAB *tab= start_from; tab <= last_tab; tab++)
         {
+          DBUG_ASSERT(tab->table);
           current_map= tab->table->map;
           used_tables2|= current_map;
-          COND *tmp_cond= make_cond_for_table(on_expr, used_tables2,
-                                             current_map);
+          /*
+            psergey: have put the MAX_TABLES below. It's bad, will need to fix it.
+          */
+          COND *tmp_cond= make_cond_for_table(thd, on_expr, used_tables2,
+                                              current_map, /*(tab - first_tab)*/ MAX_TABLES,
+					      FALSE, FALSE);
+          if (tab == first_inner_tab && tab->on_precond)
+            add_cond_and_fix(thd, &tmp_cond, tab->on_precond);
           if (tmp_cond)
           {
             JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : tab;
+            Item **sel_cond_ref= tab < first_inner_tab ?
+                                   &first_inner_tab->on_precond :
+                                   &tab->select_cond;
             /*
               First add the guards for match variables of
               all embedding outer join operations.
@@ -6733,15 +8435,18 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
               tmp_cond->quick_fix_field();
 	    /* Add the predicate to other pushed down predicates */
             DBUG_PRINT("info", ("Item_cond_and"));
-            cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond :
-	                          new Item_cond_and(cond_tab->select_cond,
-                                                    tmp_cond);
+            *sel_cond_ref= !(*sel_cond_ref) ? 
+                             tmp_cond :
+                             new Item_cond_and(*sel_cond_ref, tmp_cond);
             DBUG_PRINT("info", ("Item_cond_and 0x%lx",
-                                (ulong)cond_tab->select_cond));
-            if (!cond_tab->select_cond)
-	      DBUG_RETURN(1);
-            cond_tab->select_cond->quick_fix_field();
-          }              
+                                (ulong)(*sel_cond_ref)));
+            if (!(*sel_cond_ref))
+              DBUG_RETURN(1);
+            (*sel_cond_ref)->quick_fix_field();
+            (*sel_cond_ref)->update_used_tables();
+            if (cond_tab->select)
+              cond_tab->select->cond= cond_tab->select_cond;
+          }
         }
         first_inner_tab= first_inner_tab->first_upper;       
       }
@@ -6751,6 +8456,271 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 }
 
 
+static
+uint get_next_field_for_derived_key(uchar *arg)
+{
+  KEYUSE *keyuse= *(KEYUSE **) arg;
+  if (!keyuse)
+    return (uint) (-1);
+  TABLE *table= keyuse->table;
+  uint key= keyuse->key;
+  uint fldno= keyuse->keypart; 
+  uint keypart= keyuse->keypart_map == (key_part_map) 1 ?
+                                         0 : (keyuse-1)->keypart+1;
+  for ( ; 
+        keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
+        keyuse++)
+    keyuse->keypart= keypart;
+  if (keyuse->key != key)
+    keyuse= 0;
+  *((KEYUSE **) arg)= keyuse;
+  return fldno;
+}
+
+
+static 
+bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys)
+{
+  TABLE *table= keyuse->table;
+  if (table->alloc_keys(keys))
+    return TRUE;
+  uint keyno= 0;
+  KEYUSE *first_keyuse= keyuse;
+  uint prev_part= keyuse->keypart;
+  uint parts= 0;
+  uint i= 0;
+
+  for ( ; i < count && keyno < keys; )
+  {
+    do
+    {
+      keyuse->key= keyno;
+      keyuse->keypart_map= (key_part_map) (1 << parts);     
+      keyuse++;
+      i++;
+    } 
+    while (i < count && keyuse->used_tables == first_keyuse->used_tables &&
+           keyuse->keypart == prev_part);
+    parts++;
+    if (i < count && keyuse->used_tables == first_keyuse->used_tables)
+    {
+      prev_part= keyuse->keypart;
+    }
+    else
+    {
+      if (table->add_tmp_key(keyno, parts, 
+                             get_next_field_for_derived_key, 
+                             (uchar *) &first_keyuse,
+                             FALSE))
+        return TRUE;
+      table->reginfo.join_tab->keys.set_bit(keyno);
+      first_keyuse= keyuse;
+      keyno++;
+      parts= 0;
+      prev_part= keyuse->keypart;
+    }
+  }             
+
+  return FALSE;
+}
+   
+
+static
+bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array)
+{
+  KEYUSE *keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
+  uint elements= keyuse_array->elements;
+  TABLE *prev_table= 0;
+  for (uint i= 0; i < elements; i++, keyuse++)
+  {
+    if (!keyuse->table)
+      break;
+    KEYUSE *first_table_keyuse= NULL;
+    table_map last_used_tables= 0;
+    uint count= 0;
+    uint keys= 0;
+    TABLE_LIST *derived= NULL;
+    if (keyuse->table != prev_table)
+      derived= keyuse->table->pos_in_table_list;
+    while (derived && derived->is_materialized_derived() && 
+           keyuse->key == MAX_KEY)
+    {
+      if (keyuse->table != prev_table)
+      {
+        prev_table= keyuse->table;
+        first_table_keyuse= keyuse;
+        last_used_tables= keyuse->used_tables;
+        count= 0;
+        keys= 0;
+      }
+      else if (keyuse->used_tables != last_used_tables)
+      {
+        keys++;
+        last_used_tables= keyuse->used_tables;
+      }
+      count++;
+      keyuse++;
+      if (keyuse->table != prev_table)
+      {
+        if (generate_derived_keys_for_table(first_table_keyuse, count, ++keys))
+          return TRUE;
+        keyuse--;
+	derived= NULL;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+/*
+  @brief
+  Drops unused keys for each materialized derived table/view
+
+  @details
+  For materialized derived tables only ref access can be used, it employs
+  only one index, thus we don't need the rest. For each materialized derived
+  table/view call TABLE::use_index to save one index chosen by the optimizer
+  and free others. No key is chosen then all keys will be dropped.
+*/
+
+void JOIN::drop_unused_derived_keys()
+{
+  JOIN_TAB *tab;
+  for (tab= first_linear_tab(this, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(this, tab, WITHOUT_BUSH_ROOTS))
+  {
+    
+    TABLE *table=tab->table;
+    if (!table)
+      continue;
+    if (!table->pos_in_table_list->is_materialized_derived() ||
+        table->max_keys <= 1)
+      continue;
+    table->use_index(tab->ref.key);
+    if (table->s->keys)
+      tab->ref.key= 0;
+  }
+}
+
+
+/*
+  Evaluate the bitmap of used tables for items from the select list
+*/
+
+inline void JOIN::eval_select_list_used_tables()
+{
+  select_list_used_tables= 0;
+  Item *item;
+  List_iterator_fast<Item> it(fields_list);
+  while ((item= it++))
+  {
+    select_list_used_tables|= item->used_tables();
+  }
+  Item_outer_ref *ref;
+  List_iterator_fast<Item_outer_ref> ref_it(select_lex->inner_refs_list);
+  while ((ref= ref_it++))
+  {
+    item= ref->outer_ref;
+    select_list_used_tables|= item->used_tables();
+  }
+}
+
+
+/*
+  Determine {after which table we'll produce ordered set} 
+
+  SYNOPSIS
+    make_join_orderinfo()
+     join
+
+   
+  DESCRIPTION 
+    Determine if the set is already ordered for ORDER BY, so it can 
+    disable join cache because it will change the ordering of the results.
+    Code handles sort table that is at any location (not only first after 
+    the const tables) despite the fact that it's currently prohibited.
+    We must disable join cache if the first non-const table alone is
+    ordered. If there is a temp table the ordering is done as a last
+    operation and doesn't prevent join cache usage.
+
+  RETURN
+    Number of table after which the set will be ordered
+    join->tables if we don't need an ordered set 
+*/
+
+static uint make_join_orderinfo(JOIN *join)
+{
+  /*
+    This function needs to be fixed to take into account that we now have SJM
+    nests.
+  */
+  DBUG_ASSERT(0);
+
+  JOIN_TAB *tab;
+  if (join->need_tmp)
+    return join->table_count;
+  tab= join->get_sort_by_join_tab();
+  return tab ? tab-join->join_tab : join->table_count;
+}
+
+/*
+  Deny usage of join buffer for the specified table
+
+  SYNOPSIS
+    set_join_cache_denial()
+      tab    join table for which join buffer usage is to be denied  
+     
+  DESCRIPTION
+    The function denies usage of join buffer when joining the table 'tab'.
+    The table is marked as not employing any join buffer. If a join cache
+    object has been already allocated for the table this object is destroyed.
+
+  RETURN
+    none    
+*/
+
+static
+void set_join_cache_denial(JOIN_TAB *join_tab)
+{
+  if (join_tab->cache)
+  {
+    /* 
+      If there is a previous cache linked to this cache through the
+      next_cache pointer: remove the link. 
+    */
+    if (join_tab->cache->prev_cache)
+      join_tab->cache->prev_cache->next_cache= 0;
+    /*
+      No need to do the same for next_cache since cache denial is done
+      backwards starting from the latest cache in the linked list (see
+      revise_cache_usage()).
+    */
+    DBUG_ASSERT(!join_tab->cache->next_cache);
+
+    join_tab->cache->free();
+    join_tab->cache= 0;
+  }
+  if (join_tab->use_join_cache)
+  {
+    join_tab->use_join_cache= FALSE;
+    join_tab->used_join_cache_level= 0;
+    /*
+      It could be only sub_select(). It could not be sub_seject_sjm because we
+      don't do join buffering for the first table in sjm nest. 
+    */
+    join_tab[-1].next_select= sub_select;
+    if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join())
+    {
+      join_tab->type= JT_ALL;
+      join_tab->ref.key_parts= 0;
+    }
+    join_tab->join->return_tab= join_tab;
+  }
+}
+
+
 /**
   The default implementation of unlock-row method of READ_RECORD,
   used in all access methods.
@@ -6763,7 +8733,6 @@ void rr_unlock_row(st_join_table *tab)
 }
 
 
-
 /**
   Pick the appropriate access method functions
 
@@ -6814,47 +8783,673 @@ pick_table_access_method(JOIN_TAB *tab)
 }
 
 
-static void
-make_join_readinfo(JOIN *join, ulonglong options)
+/* 
+  Revise usage of join buffer for the specified table and the whole nest   
+
+  SYNOPSIS
+    revise_cache_usage()
+      tab    join table for which join buffer usage is to be revised  
+
+  DESCRIPTION
+    The function revise the decision to use a join buffer for the table 'tab'.
+    If this table happened to be among the inner tables of a nested outer join/
+    semi-join the functions denies usage of join buffers for all of them
+
+  RETURN
+    none    
+*/
+
+static
+void revise_cache_usage(JOIN_TAB *join_tab)
+{
+  JOIN_TAB *tab;
+  JOIN_TAB *first_inner;
+
+  if (join_tab->first_inner)
+  {
+    JOIN_TAB *end_tab= join_tab;
+    for (first_inner= join_tab->first_inner; 
+         first_inner;
+         first_inner= first_inner->first_upper)           
+    {
+      for (tab= end_tab-1; tab >= first_inner; tab--)
+        set_join_cache_denial(tab);
+      end_tab= first_inner;
+    }
+  }
+  else if (join_tab->first_sj_inner_tab)
+  {
+    first_inner= join_tab->first_sj_inner_tab;
+    for (tab= join_tab-1; tab >= first_inner; tab--)
+    {
+      if (tab->first_sj_inner_tab == first_inner)
+        set_join_cache_denial(tab);
+    }
+  }
+  else set_join_cache_denial(join_tab);
+}
+
+
+/*
+  end_select-compatible function that writes the record into a sjm temptable
+  
+  SYNOPSIS
+    end_sj_materialize()
+      join            The join 
+      join_tab        Points to right after the last join_tab in materialization bush
+      end_of_records  FALSE <=> This call is made to pass another record 
+                                combination
+                      TRUE  <=> EOF (no action)
+
+  DESCRIPTION
+    This function is used by semi-join materialization to capture suquery's
+    resultset and write it into the temptable (that is, materialize it).
+
+  NOTE
+    This function is used only for semi-join materialization. Non-semijoin
+    materialization uses different mechanism.
+
+  RETURN 
+    NESTED_LOOP_OK
+    NESTED_LOOP_ERROR
+*/
+
+enum_nested_loop_state 
+end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
+{
+  int error;
+  THD *thd= join->thd;
+  SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info;
+  DBUG_ENTER("end_sj_materialize");
+  if (!end_of_records)
+  {
+    TABLE *table= sjm->table;
+
+    List_iterator<Item> it(sjm->sjm_table_cols);
+    Item *item;
+    while ((item= it++))
+    {
+      if (item->is_null())
+        DBUG_RETURN(NESTED_LOOP_OK);
+    }
+    fill_record(thd, table->field, sjm->sjm_table_cols, TRUE, FALSE);
+    if (thd->is_error())
+      DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    if ((error= table->file->ha_write_tmp_row(table->record[0])))
+    {
+      /* create_myisam_from_heap will generate error if needed */
+      if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
+          create_internal_tmp_table_from_heap(thd, table,
+                                              sjm->sjm_table_param.start_recinfo, 
+                                              &sjm->sjm_table_param.recinfo, error, 1))
+        DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    }
+  }
+  DBUG_RETURN(NESTED_LOOP_OK);
+}
+
+
+/* 
+  Check whether a join buffer can be used to join the specified table   
+
+  SYNOPSIS
+    check_join_cache_usage()
+      tab                 joined table to check join buffer usage for
+      options             options of the join
+      no_jbuf_after       don't use join buffering after table with this number
+      prev_tab            previous join table
+
+  DESCRIPTION
+    The function finds out whether the table 'tab' can be joined using a join
+    buffer. This check is performed after the best execution plan for 'join'
+    has been chosen. If the function decides that a join buffer can be employed
+    then it selects the most appropriate join cache object that contains this
+    join buffer.
+    The result of the check and the type of the the join buffer to be used
+    depend on:
+      - the access method to access rows of the joined table
+      - whether the join table is an inner table of an outer join or semi-join
+      - whether the optimizer switches
+          outer_join_with_cache, semijoin_with_cache, join_cache_incremental,
+          join_cache_hashed, join_cache_bka,
+        are set on or off
+      - the join cache level set for the query
+      - the join 'options'.
+
+    In any case join buffer is not used if the number of the joined table is
+    greater than 'no_jbuf_after'. It's also never used if the value of
+    join_cache_level is equal to 0.
+    If the optimizer switch outer_join_with_cache is off no join buffer is
+    used for outer join operations.
+    If the optimizer switch semijoin_with_cache is off no join buffer is used
+    for semi-join operations.
+    If the optimizer switch join_cache_incremental is off no incremental join
+    buffers are used.
+    If the optimizer switch join_cache_hashed is off then the optimizer uses
+    neither BNLH algorithm, nor BKAH algorithm to perform join operations.
+
+    If the optimizer switch join_cache_bka is off then the optimizer uses
+    neither BKA algorithm, nor BKAH algorithm to perform join operation.
+    The valid settings for join_cache_level lay in the interval 0..8.
+    If it set to 0 no join buffers are used to perform join operations.
+    Currently we differentiate between join caches of 8 levels:
+      1 : non-incremental join cache used for BNL join algorithm
+      2 : incremental join cache used for BNL join algorithm
+      3 : non-incremental join cache used for BNLH join algorithm
+      4 : incremental join cache used for BNLH join algorithm
+      5 : non-incremental join cache used for BKA join algorithm
+      6 : incremental join cache used for BKA join algorithm 
+      7 : non-incremental join cache used for BKAH join algorithm 
+      8 : incremental join cache used for BKAH join algorithm
+    If the value of join_cache_level is set to n then no join caches of
+    levels higher than n can be employed.
+
+    If the optimizer switches outer_join_with_cache, semijoin_with_cache,
+    join_cache_incremental, join_cache_hashed, join_cache_bka are all on
+    the following rules are applied.
+    If join_cache_level==1|2 then join buffer is used for inner joins, outer
+    joins and semi-joins with 'JT_ALL' access method. In this case a
+    JOIN_CACHE_BNL object is employed.
+    If join_cache_level==3|4 and then join buffer is used for a join operation
+    (inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method
+    then a JOIN_CACHE_BNLH object is employed. 
+    If an index is used to access rows of the joined table and the value of
+    join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed. 
+    If an index is used to access rows of the joined table and the value of
+    join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed. 
+    If the value of join_cache_level is odd then creation of a non-linked 
+    join cache is forced.
+
+    Currently for any join operation a join cache of the  level of the
+    highest allowed and applicable level is used.
+    For example, if join_cache_level is set to 6 and the optimizer switch
+    join_cache_bka is off, while the optimizer switch join_cache_hashed is
+    on then for any inner join operation with JT_REF/JT_EQREF access method
+    to the joined table the BNLH join algorithm will be used, while for
+    the table accessed by the JT_ALL methods the BNL algorithm will be used.
+
+    If the function decides that a join buffer can be used to join the table
+    'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns
+    the selected join cache object to the field 'cache' of the previous
+    join table. 
+    If the function creates a join cache object it tries to initialize it. The
+    failure to do this results in an invocation of the function that destructs
+    the created object.
+    If the function decides that but some reasons no join buffer can be used
+    for a table it calls the function revise_cache_usage that checks
+    whether join cache should be denied for some previous tables. In this case
+    a pointer to the first table for which join cache usage has been denied
+    is passed in join->return_val (see the function set_join_cache_denial).
+    
+    The functions changes the value the fields tab->icp_other_tables_ok and
+    tab->idx_cond_fact_out to FALSE if the chosen join cache algorithm 
+    requires it.
+ 
+  NOTES
+    An inner table of a nested outer join or a nested semi-join can be currently
+    joined only when a linked cache object is employed. In these cases setting
+    join_cache_incremental to 'off' results in denial of usage of any join
+    buffer when joining the table.
+    For a nested outer join/semi-join, currently, we either use join buffers for
+    all inner tables or for none of them. 
+    Some engines (e.g. Falcon) currently allow to use only a join cache
+    of the type JOIN_CACHE_BKAH when the joined table is accessed through
+    an index. For these engines setting the value of join_cache_level to 5 or 6
+    results in that no join buffer is used to join the table. 
+  
+  RETURN VALUE
+    cache level if cache is used, otherwise returns 0
+
+  TODO
+    Support BKA inside SJ-Materialization nests. When doing this, we'll need
+    to only store sj-inner tables in the join buffer.
+#if 0
+        JOIN_TAB *first_tab= join->join_tab+join->const_tables;
+        uint n_tables= i-join->const_tables;
+        / *
+          We normally put all preceding tables into the join buffer, except
+          for the constant tables.
+          If we're inside a semi-join materialization nest, e.g.
+
+             outer_tbl1  outer_tbl2  ( inner_tbl1, inner_tbl2 ) ...
+                                                       ^-- we're here
+
+          then we need to put into the join buffer only the tables from
+          within the nest.
+        * /
+        if (i >= first_sjm_table && i < last_sjm_table)
+        {
+          n_tables= i - first_sjm_table; // will be >0 if we got here
+          first_tab= join->join_tab + first_sjm_table;
+        }
+#endif
+*/
+
+static
+uint check_join_cache_usage(JOIN_TAB *tab,
+                            ulonglong options,
+                            uint no_jbuf_after,
+                            uint table_index,
+                            JOIN_TAB *prev_tab)
 {
+  COST_VECT cost;
+  uint flags= 0;
+  ha_rows rows= 0;
+  uint bufsz= 4096;
+  JOIN_CACHE *prev_cache=0;
+  JOIN *join= tab->join;
+  uint cache_level= tab->used_join_cache_level;
+  bool force_unlinked_cache=
+         !(join->allowed_join_cache_types & JOIN_CACHE_INCREMENTAL_BIT);
+  bool no_hashed_cache=
+         !(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT);
+  bool no_bka_cache= 
+         !(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT);
+
+  join->return_tab= 0;
+
+  /*
+    Don't use join cache if @@join_cache_level==0 or this table is the first
+    one join suborder (either at top level or inside a bush)
+  */
+  if (cache_level == 0 || !prev_tab)
+    return 0;
+
+  if (force_unlinked_cache && (cache_level%2 == 0))
+    cache_level--;
+
+  if (options & SELECT_NO_JOIN_CACHE)
+    goto no_join_cache;
+
+  if (tab->use_quick == 2)
+    goto no_join_cache;
+
+  if (tab->is_inner_table_of_semi_join_with_first_match() &&
+      !join->allowed_semijoin_with_cache)
+    goto no_join_cache;
+  if (tab->is_inner_table_of_outer_join() &&
+      !join->allowed_outer_join_with_cache)
+    goto no_join_cache;
+
+  /*
+    Non-linked join buffers can't guarantee one match
+  */
+  if (tab->is_nested_inner())
+  {
+    if (force_unlinked_cache || cache_level == 1)
+      goto no_join_cache;
+    if (cache_level & 1)
+      cache_level--;
+  }
+    
+  /*
+    Don't use join buffering if we're dictated not to by no_jbuf_after
+    (This is not meaningfully used currently)
+  */
+  if (table_index > no_jbuf_after)
+    goto no_join_cache;
+  
+  /*
+    TODO: BNL join buffer should be perfectly ok with tab->bush_children.
+  */
+  if (tab->loosescan_match_tab || tab->bush_children)
+    goto no_join_cache;
+
+  for (JOIN_TAB *first_inner= tab->first_inner; first_inner;
+       first_inner= first_inner->first_upper)
+  {
+    if (first_inner != tab && !first_inner->use_join_cache)
+      goto no_join_cache;
+  }
+  if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab &&
+      !tab->first_sj_inner_tab->use_join_cache)
+    goto no_join_cache;
+  if (!prev_tab->use_join_cache)
+  {
+    /* 
+      Check whether table tab and the previous one belong to the same nest of
+      inner tables and if so do not use join buffer when joining table tab. 
+    */
+    if (tab->first_inner)
+    {
+      for (JOIN_TAB *first_inner= tab[-1].first_inner;
+           first_inner;
+           first_inner= first_inner->first_upper)
+      {
+        if (first_inner == tab->first_inner)
+          goto no_join_cache;
+      }
+    }
+    else if (tab->first_sj_inner_tab &&
+             tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab)
+      goto no_join_cache; 
+  }       
+
+  prev_cache= prev_tab->cache;
+
+  switch (tab->type) {
+  case JT_ALL:
+    if (cache_level == 1)
+      prev_cache= 0;
+    if ((tab->cache= new JOIN_CACHE_BNL(join, tab, prev_cache)) &&
+        ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+    {
+      tab->icp_other_tables_ok= FALSE;
+      return (2-test(!prev_cache));
+    }
+    goto no_join_cache;
+  case JT_SYSTEM:
+  case JT_CONST:
+  case JT_REF:
+  case JT_EQ_REF:
+    if (cache_level <=2 || (no_hashed_cache && no_bka_cache))
+      goto no_join_cache;
+    if (!tab->is_ref_for_hash_join())
+    {
+      flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
+      if (tab->table->covering_keys.is_set(tab->ref.key))
+        flags|= HA_MRR_INDEX_ONLY;
+      rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
+                                                    tab->ref.key_parts,
+                                                    &bufsz, &flags, &cost);
+    }
+
+    if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache ||
+        tab->is_ref_for_hash_join() ||
+	((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6))
+    {
+      if (!tab->hash_join_is_possible() ||
+          tab->make_scan_filter())
+        goto no_join_cache;
+      if (cache_level == 3)
+        prev_cache= 0;
+      if ((tab->cache= new JOIN_CACHE_BNLH(join, tab, prev_cache)) &&
+          ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+      {
+        tab->icp_other_tables_ok= FALSE;        
+        return (4-test(!prev_cache));
+      }
+      goto no_join_cache;
+    }
+    if (cache_level > 4 && no_bka_cache)
+      goto no_join_cache;
+    
+    if ((flags & HA_MRR_NO_ASSOCIATION) &&
+	(cache_level <= 6 || no_hashed_cache))
+      goto no_join_cache;
+
+    if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL))
+    {
+      if (cache_level <= 6 || no_hashed_cache)
+      {
+        if (cache_level == 5)
+          prev_cache= 0;
+        if ((tab->cache= new JOIN_CACHE_BKA(join, tab, flags, prev_cache)) &&
+            ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+          return (6-test(!prev_cache));
+        goto no_join_cache;
+      }
+      else
+      {
+        if (cache_level == 7)
+          prev_cache= 0;
+        if ((tab->cache= new JOIN_CACHE_BKAH(join, tab, flags, prev_cache)) &&
+            ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+	{
+         tab->idx_cond_fact_out= FALSE;
+          return (8-test(!prev_cache));
+        }
+        goto no_join_cache;
+      }
+    }
+    goto no_join_cache;
+  default : ;
+  }
+
+no_join_cache:
+  if (tab->type != JT_ALL && tab->is_ref_for_hash_join())
+    tab->type= JT_ALL;
+  revise_cache_usage(tab); 
+  return 0;
+}
+
+
+/* 
+  Check whether join buffers can be used to join tables of a join   
+
+  SYNOPSIS
+    check_join_cache_usage()
+      join                join whose tables are to be checked             
+      options             options of the join
+      no_jbuf_after       don't use join buffering after table with this number
+                          (The tables are assumed to be numbered in
+                          first_linear_tab(join, WITHOUT_CONST_TABLES),
+                          next_linear_tab(join, WITH_CONST_TABLES) order).
+
+  DESCRIPTION
+    For each table after the first non-constant table the function checks
+    whether the table can be joined using a join buffer. If the function decides
+    that a join buffer can be employed then it selects the most appropriate join
+    cache object that contains this join buffer whose level is not greater
+    than join_cache_level set for the join. To make this check the function
+    calls the function check_join_cache_usage for every non-constant table.
+
+  NOTES
+    In some situations (e.g. for nested outer joins, for nested semi-joins) only
+    incremental buffers can be used. If it turns out that for some inner table
+    no join buffer can be used then any inner table of an outer/semi-join nest
+    cannot use join buffer. In the case when already chosen buffer must be
+    denied for a table the function recalls check_join_cache_usage()
+    starting from this table. The pointer to the table from which the check
+    has to be restarted is returned in join->return_val (see the description
+    of check_join_cache_usage).
+*/
+
+void check_join_cache_usage_for_tables(JOIN *join, ulonglong options,
+                                       uint no_jbuf_after)
+{
+  JOIN_TAB *tab;
+  JOIN_TAB *prev_tab;
+
+  for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    tab->used_join_cache_level= join->max_allowed_join_cache_level;  
+  }
+  
+  uint idx= join->const_tables;
+  for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+restart:
+    tab->icp_other_tables_ok= TRUE;
+    tab->idx_cond_fact_out= TRUE;
+    
+    /* 
+      Check if we have a preceding join_tab, as something that will feed us
+      records that we could buffer. We don't have it, if 
+       - this is the first non-const table in the join order,
+       - this is the first table inside an SJM nest.
+    */
+    prev_tab= tab - 1;
+    if (tab == join->join_tab + join->const_tables ||
+        (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab))
+      prev_tab= NULL;
+
+    switch (tab->type) {
+    case JT_SYSTEM:
+    case JT_CONST:
+    case JT_EQ_REF:
+    case JT_REF:
+    case JT_REF_OR_NULL:
+    case JT_ALL:
+      tab->used_join_cache_level= check_join_cache_usage(tab, options,
+                                                         no_jbuf_after,
+                                                         idx,
+                                                         prev_tab);
+      tab->use_join_cache= test(tab->used_join_cache_level);
+      /*
+        psergey-merge: todo: raise the question that this is really stupid that
+        we can first allocate a join buffer, then decide not to use it and free
+        it.
+      */
+      if (join->return_tab)
+      {
+        tab= join->return_tab;
+        goto restart;
+      }
+      break; 
+    default:
+      tab->used_join_cache_level= 0;
+    }
+    if (!tab->bush_children)
+      idx++;
+  }
+}
+
+
+/*
+  Plan refinement stage: do various setup things for the executor
+
+  SYNOPSIS
+    make_join_readinfo()
+      join           Join being processed
+      options        Join's options (checking for SELECT_DESCRIBE, 
+                     SELECT_NO_JOIN_CACHE)
+      no_jbuf_after  Don't use join buffering after table with this number.
+
+  DESCRIPTION
+    Plan refinement stage: do various set ups for the executioner
+      - set up use of join buffering
+      - push index conditions
+      - increment relevant counters
+      - etc
+
+  RETURN 
+    FALSE - OK
+    TRUE  - Out of memory
+*/
+
+static bool
+make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
+{
+  JOIN_TAB *tab;
   uint i;
+  DBUG_ENTER("make_join_readinfo");
+
   bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
-  bool ordered_set= 0;
   bool sorted= 1;
-  DBUG_ENTER("make_join_readinfo");
 
-  for (i=join->const_tables ; i < join->tables ; i++)
+  if (!join->select_lex->sj_nests.is_empty() &&
+      setup_semijoin_dups_elimination(join, options, no_jbuf_after))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  
+  /* For const tables, set partial_join_cardinality to 1. */
+  for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++)
+    tab->partial_join_cardinality= 1; 
+
+  JOIN_TAB *prev_tab= NULL;
+  for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES), i= join->const_tables; 
+       tab; 
+       prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
+    /*
+      The approximation below for partial join cardinality is not good because
+        - it does not take into account some pushdown predicates
+        - it does not differentiate between inner joins, outer joins and
+        semi-joins.
+      Later it should be improved.
+    */
+
+    if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)
+      prev_tab= NULL;
+    DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table);
+
+    tab->partial_join_cardinality= join->best_positions[i].records_read *
+                                   (prev_tab? prev_tab->partial_join_cardinality : 1);
+    if (!tab->bush_children)
+      i++;
+  }
+ 
+  check_join_cache_usage_for_tables(join, options, no_jbuf_after);
+  
+  JOIN_TAB *first_tab;
+  for (tab= first_tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    if (tab->bush_children)
+    {
+      if (setup_sj_materialization_part2(tab))
+        return TRUE;
+    }
+
     TABLE *table=tab->table;
+    uint jcl= tab->used_join_cache_level;
     tab->read_record.table= table;
     tab->read_record.file=table->file;
     tab->read_record.unlock_row= rr_unlock_row;
-    tab->next_select=sub_select;		/* normal select */
+    tab->sorted= sorted;
+    sorted= 0;                                  // only first must be sorted
+    
 
     /*
-      Determine if the set is already ordered for ORDER BY, so it can 
-      disable join cache because it will change the ordering of the results.
-      Code handles sort table that is at any location (not only first after 
-      the const tables) despite the fact that it's currently prohibited.
-      We must disable join cache if the first non-const table alone is
-      ordered. If there is a temp table the ordering is done as a last
-      operation and doesn't prevent join cache usage.
+      We should not set tab->next_select for the last table in the
+      SMJ-nest, as setup_sj_materialization() has already set it to
+      end_sj_materialize.
     */
-    if (!ordered_set && !join->need_tmp && 
-        (table == join->sort_by_table ||
-         (join->sort_by_table == (TABLE *) 1 && i != join->const_tables)))
-      ordered_set= 1;
+    if (!(tab->bush_root_tab && 
+          tab->bush_root_tab->bush_children->end == tab + 1))
+    {
+      tab->next_select=sub_select;		/* normal select */
+    }
 
-    tab->sorted= sorted;
-    sorted= 0;                                  // only first must be sorted
+
+    if (tab->loosescan_match_tab)
+    {
+      if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
+                                                         loosescan_key_len)))
+        return TRUE; /* purecov: inspected */
+      tab->sorted= TRUE;
+    }
     table->status=STATUS_NO_RECORD;
     pick_table_access_method (tab);
 
+    if (jcl)
+       tab[-1].next_select=sub_select_cache;
+
+    if (tab->cache && tab->cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)
+      tab->type= JT_HASH;
+      
     switch (tab->type) {
+    case JT_SYSTEM:				// Only happens with left join 
+    case JT_CONST:				// Only happens with left join
+      /* Only happens with outer joins */
+      tab->read_first_record= tab->type == JT_SYSTEM ?
+                                join_read_system :join_read_const;
+      if (table->covering_keys.is_set(tab->ref.key) &&
+          !table->no_keyread)
+      {
+        table->key_read=1;
+        table->file->extra(HA_EXTRA_KEYREAD);
+      }
+      else if (!jcl || jcl > 4) 
+        push_index_cond(tab, tab->ref.key);
+        break;
     case JT_EQ_REF:
       tab->read_record.unlock_row= join_read_key_unlock_row;
       /* fall through */
+      if (table->covering_keys.is_set(tab->ref.key) &&
+	  !table->no_keyread)
+      {
+	table->key_read=1;
+	table->file->extra(HA_EXTRA_KEYREAD);
+      }
+      else if (!jcl || jcl > 4) 
+        push_index_cond(tab, tab->ref.key);
+      break;
     case JT_REF_OR_NULL:
     case JT_REF:
       if (tab->select)
@@ -6864,27 +9459,20 @@ make_join_readinfo(JOIN *join, ulonglong options)
       }
       delete tab->quick;
       tab->quick=0;
-      /* fall through */
-    case JT_CONST:				// Only happens with left join
       if (table->covering_keys.is_set(tab->ref.key) &&
 	  !table->no_keyread)
-        table->set_keyread(TRUE);
+        table->enable_keyread();
+      else if (!jcl || jcl > 4)
+        push_index_cond(tab, tab->ref.key);
       break;
     case JT_ALL:
+    case JT_HASH:
       /*
 	If previous table use cache
         If the incoming data set is already sorted don't use cache.
+        Also don't use cache if this is the first table in semi-join
+          materialization nest.
       */
-      if (i != join->const_tables && !(options & SELECT_NO_JOIN_CACHE) &&
-          tab->use_quick != 2 && !tab->first_inner && !ordered_set)
-      {
-	if ((options & SELECT_DESCRIBE) ||
-	    !join_init_cache(join->thd,join->join_tab+join->const_tables,
-			     i-join->const_tables))
-	{
-	  tab[-1].next_select=sub_select_cache; /* Patch previous */
-	}
-      }
       /* These init changes read_record */
       if (tab->use_quick == 2)
       {
@@ -6895,8 +9483,9 @@ make_join_readinfo(JOIN *join, ulonglong options)
       }
       else
       {
-	tab->read_first_record= join_init_read_record;
-	if (i == join->const_tables)
+        if (!tab->bush_children)
+          tab->read_first_record= join_init_read_record;
+	if (tab == first_tab)
 	{
 	  if (tab->select && tab->select->quick)
 	  {
@@ -6907,7 +9496,10 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  {
 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
 	    if (statistics)
+	    {
 	      status_var_increment(join->thd->status_var.select_scan_count);
+	      join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
+	    }
 	  }
 	}
 	else
@@ -6915,13 +9507,18 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  if (tab->select && tab->select->quick)
 	  {
 	    if (statistics)
-	      status_var_increment(join->thd->status_var.select_full_range_join_count);
+	      status_var_increment(join->thd->status_var.
+                                   select_full_range_join_count);
 	  }
 	  else
 	  {
 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
 	    if (statistics)
-	      status_var_increment(join->thd->status_var.select_full_join_count);
+	    {
+	      status_var_increment(join->thd->status_var.
+                                   select_full_join_count);
+	      join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
+	    }
 	  }
 	}
 	if (!table->no_keyread)
@@ -6929,41 +9526,84 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  if (tab->select && tab->select->quick &&
               tab->select->quick->index != MAX_KEY && //not index_merge
 	      table->covering_keys.is_set(tab->select->quick->index))
-            table->set_keyread(TRUE);
+            table->enable_keyread();
 	  else if (!table->covering_keys.is_clear_all() &&
 		   !(tab->select && tab->select->quick))
 	  {					// Only read index tree
+#ifdef BAD_OPTIMIZATION
 	    /*
-            It has turned out that the below change, while speeding things
-            up for disk-bound loads, slows them down for cases when the data
-            is in disk cache (see BUG#35850):
-	    //  See bug #26447: "Using the clustered index for a table scan
-	    //  is always faster than using a secondary index".
+              It has turned out that the below change, while speeding things
+              up for disk-bound loads, slows them down for cases when the data
+              is in disk cache (see BUG#35850):
+              See bug #26447: "Using the clustered index for a table scan
+              is always faster than using a secondary index".
+            */
             if (table->s->primary_key != MAX_KEY &&
                 table->file->primary_key_is_clustered())
               tab->index= table->s->primary_key;
             else
-	    */
+#endif
               tab->index=find_shortest_key(table, & table->covering_keys);
 	    tab->read_first_record= join_read_first;
-	    tab->type=JT_NEXT;		// Read with index_first / index_next
+            /* Read with index_first / index_next */
+	    tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT;		
 	  }
 	}
+        if (tab->select && tab->select->quick &&
+            tab->select->quick->index != MAX_KEY && ! tab->table->key_read)
+          push_index_cond(tab, tab->select->quick->index);
       }
       break;
     case JT_FT:
-    case JT_SYSTEM: 
       break;
+      /* purecov: begin deadcode */
     default:
-      DBUG_PRINT("error",("Table type %d found",tab->type)); /* purecov: deadcode */
-      break;					/* purecov: deadcode */
+      DBUG_PRINT("error",("Table type %d found",tab->type));
+      break;
     case JT_UNKNOWN:
     case JT_MAYBE_REF:
-      abort();					/* purecov: deadcode */
+      abort();
+      /* purecov: end */
     }
   }
-  join->join_tab[join->tables-1].next_select=0; /* Set by do_select */
-  DBUG_VOID_RETURN;
+  uint n_top_tables= join->join_tab_ranges.head()->end -  
+                     join->join_tab_ranges.head()->start;
+
+  join->join_tab[n_top_tables - 1].next_select=0;  /* Set by do_select */
+  
+  /*
+    If a join buffer is used to join a table the ordering by an index
+    for the first non-constant table cannot be employed anymore.
+  */
+  for (tab= join->join_tab + join->const_tables ; 
+       tab != join->join_tab + n_top_tables ; tab++)
+  {
+    if (tab->use_join_cache)
+    {
+       JOIN_TAB *sort_by_tab= join->group && join->simple_group &&
+                              join->group_list ?
+			       join->join_tab+join->const_tables :
+                               join->get_sort_by_join_tab();
+     if (sort_by_tab)
+      {
+        join->need_tmp= 1;
+        join->simple_order= join->simple_group= 0;
+        if (sort_by_tab->type == JT_NEXT)
+        {
+          sort_by_tab->type= JT_ALL;
+          sort_by_tab->read_first_record= join_init_read_record;
+        }
+        else if (sort_by_tab->type == JT_HASH_NEXT)
+        {
+          sort_by_tab->type= JT_HASH;
+          sort_by_tab->read_first_record= join_init_read_record;
+        }
+      }
+      break;
+    }
+  }
+
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -6983,9 +9623,8 @@ make_join_readinfo(JOIN *join, ulonglong options)
 
 bool error_if_full_join(JOIN *join)
 {
-  for (JOIN_TAB *tab=join->join_tab, *end=join->join_tab+join->tables;
-       tab < end;
-       tab++)
+  for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab;
+       tab= next_top_level_tab(join, tab))
   {
     if (tab->type == JT_ALL && (!tab->select || !tab->select->quick))
     {
@@ -7002,21 +9641,41 @@ bool error_if_full_join(JOIN *join)
 
 /**
   cleanup JOIN_TAB.
+
+  DESCRIPTION 
+    This is invoked when we've finished all join executions.
 */
 
 void JOIN_TAB::cleanup()
 {
+  DBUG_ENTER("JOIN_TAB::cleanup");
+  DBUG_PRINT("enter", ("table %s.%s",
+                       (table ? table->s->db.str : "?"),
+                       (table ? table->s->table_name.str : "?")));
   delete select;
   select= 0;
   delete quick;
   quick= 0;
-  my_free(cache.buff);
-  cache.buff= 0;
+  if (cache)
+  {
+    cache->free();
+    cache= 0;
+  }
   limit= 0;
   if (table)
   {
-    table->set_keyread(FALSE);
+    table->disable_keyread();
     table->file->ha_index_or_rnd_end();
+    preread_init_done= FALSE;
+    if (table->pos_in_table_list && 
+        table->pos_in_table_list->jtbm_subselect)
+    {
+      end_read_record(&read_record);
+      //psergey-merge:
+      table->pos_in_table_list->jtbm_subselect->cleanup();
+      table= NULL;
+      DBUG_VOID_RETURN;
+    }
     /*
       We need to reset this for next select
       (Tested in part_of_refkey)
@@ -7024,6 +9683,139 @@ void JOIN_TAB::cleanup()
     table->reginfo.join_tab= 0;
   }
   end_read_record(&read_record);
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Estimate the time to get rows of the joined table
+*/
+
+double JOIN_TAB::scan_time()
+{
+  double res;
+  if (table->created)
+  {
+    if (table->is_filled_at_execution())
+    {
+      get_delayed_table_estimates(table, &records, &read_time,
+                                    &startup_cost);
+      found_records= records;
+      table->quick_condition_rows= records;
+    }
+    else
+    {
+      found_records= records= table->file->stats.records;
+      read_time= table->file->scan_time();
+      /*
+        table->quick_condition_rows has already been set to
+        table->file->stats.records
+      */
+    }
+    res= read_time;
+  }
+  else
+  {
+    found_records= records=table->file->stats.records;
+    read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub
+    res= read_time;
+  }
+  return res;
+}
+
+/**
+  Initialize the join_tab before reading.
+  Currently only derived table/view materialization is done here.
+
+  TODO: consider moving this together with join_tab_execution_startup
+*/
+bool JOIN_TAB::preread_init()
+{
+  TABLE_LIST *derived= table->pos_in_table_list;
+  if (!derived || !derived->is_materialized_derived())
+  {
+    preread_init_done= TRUE;
+    return FALSE;
+  }
+
+  /* Materialize derived table/view. */
+  if (!derived->get_unit()->executed &&
+      mysql_handle_single_derived(join->thd->lex,
+                                    derived, DT_CREATE | DT_FILL))
+      return TRUE;
+  preread_init_done= TRUE;
+  return FALSE;
+}
+
+
+
+/**
+  Build a TABLE_REF structure for index lookup in the temporary table
+
+  @param thd             Thread handle
+  @param tmp_key         The temporary table key
+  @param it              The iterator of items for lookup in the key
+  @param skip            Number of fields from the beginning to skip
+
+  @details
+  Build TABLE_REF object for lookup in the key 'tmp_key' using items
+  accessible via item iterator 'it'.
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
+                                            KEY *tmp_key,
+                                            Item_iterator &it,
+                                            bool value,
+                                            uint skip)
+{
+  uint tmp_key_parts= tmp_key->key_parts;
+  uint i;
+  DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init");
+
+  key= 0; /* The only temp table index. */
+  key_length= tmp_key->key_length;
+  if (!(key_buff=
+        (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+      !(key_copy=
+        (store_key**) thd->alloc((sizeof(store_key*) *
+                                  (tmp_key_parts + 1)))) ||
+      !(items=
+        (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+    DBUG_RETURN(TRUE);
+
+  key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length);
+
+  KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+  store_key **ref_key= key_copy;
+  uchar *cur_ref_buff= key_buff;
+
+  it.open();
+  for (i= 0; i < skip; i++) it.next();
+  for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+  {
+    Item *item= it.next();
+    DBUG_ASSERT(item);
+    items[i]= item;
+    int null_count= test(cur_key_part->field->real_maybe_null());
+    *ref_key= new store_key_item(thd, cur_key_part->field,
+                                 /* TIMOUR:
+                                    the NULL byte is taken into account in
+                                    cur_key_part->store_length, so instead of
+                                    cur_ref_buff + test(maybe_null), we could
+                                    use that information instead.
+                                 */
+                                 cur_ref_buff + null_count,
+                                 null_count ? cur_ref_buff : 0,
+                                 cur_key_part->length, items[i], value);
+    cur_ref_buff+= cur_key_part->store_length;
+  }
+  *ref_key= NULL; /* End marker. */
+  key_err= 1;
+  key_parts= tmp_key_parts;
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -7078,7 +9870,7 @@ void JOIN::join_free()
     Optimization: if not EXPLAIN and we are done with the JOIN,
     free all tables.
   */
-  bool full= (!select_lex->uncacheable && !thd->lex->describe);
+  bool full= !(select_lex->uncacheable);
   bool can_unlock= full;
   DBUG_ENTER("JOIN::join_free");
 
@@ -7142,31 +9934,42 @@ void JOIN::join_free()
 void JOIN::cleanup(bool full)
 {
   DBUG_ENTER("JOIN::cleanup");
+  DBUG_PRINT("enter", ("full %u", (uint) full));
 
-  if (all_tables)
+  if (table)
   {
-    JOIN_TAB *tab,*end;
+    JOIN_TAB *tab;
     /*
       Only a sorted table may be cached.  This sorted table is always the
-      first non const table in join->all_tables
+      first non const table in join->table
     */
-    if (tables > const_tables) // Test for not-const tables
+    if (table_count > const_tables) // Test for not-const tables
     {
-      free_io_cache(all_tables[const_tables]);
-      filesort_free_buffers(all_tables[const_tables],full);
+      JOIN_TAB *first_tab= first_top_level_tab(this, WITHOUT_CONST_TABLES);
+      if (first_tab->table)
+      {
+        free_io_cache(first_tab->table);
+        filesort_free_buffers(first_tab->table, full);
+      }
     }
 
     if (full)
     {
-      for (tab= join_tab, end= tab+tables; tab != end; tab++)
+      for (tab= first_linear_tab(this, WITH_CONST_TABLES); tab; 
+           tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
 	tab->cleanup();
     }
     else
     {
-      for (tab= join_tab, end= tab+tables; tab != end; tab++)
+      for (tab= first_linear_tab(this, WITH_CONST_TABLES); tab; 
+           tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
       {
 	if (tab->table)
+        {
+          DBUG_PRINT("info", ("close index: %s.%s", tab->table->s->db.str,
+                              tab->table->s->table_name.str));
           tab->table->file->ha_index_or_rnd_end();
+        }
       }
     }
   }
@@ -7228,6 +10031,8 @@ void JOIN::cleanup(bool full)
   SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
   @endcode
+
+  TODO: this function checks ORDER::used, which can only have a value of 0.
 */
 
 static bool
@@ -7287,8 +10092,6 @@ eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab)
 static bool
 only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
 {
-  if (specialflag &  SPECIAL_SAFE_MODE)
-    return 0;			// skip this optimize /* purecov: inspected */
   tables&= ~PSEUDO_TABLE_BITS;
   for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
   {
@@ -7303,9 +10106,8 @@ only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
 
 static void update_depend_map(JOIN *join)
 {
-  JOIN_TAB *join_tab=join->join_tab, *end=join_tab+join->tables;
-
-  for (; join_tab != end ; join_tab++)
+  for (JOIN_TAB *join_tab= first_linear_tab(join, WITH_CONST_TABLES); join_tab;
+       join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS))
   {
     TABLE_REF *ref= &join_tab->ref;
     table_map depend_map=0;
@@ -7316,11 +10118,11 @@ static void update_depend_map(JOIN *join)
     ref->depend_map=depend_map & ~OUTER_REF_TABLE_BIT;
     depend_map&= ~OUTER_REF_TABLE_BIT;
     for (JOIN_TAB **tab=join->map2table;
-	 depend_map ;
-	 tab++,depend_map>>=1 )
+         depend_map ;
+         tab++,depend_map>>=1 )
     {
       if (depend_map & 1)
-	ref->depend_map|=(*tab)->ref.depend_map;
+        ref->depend_map|=(*tab)->ref.depend_map;
     }
   }
 }
@@ -7328,7 +10130,7 @@ static void update_depend_map(JOIN *join)
 
 /** Update the dependency map for the sort order. */
 
-static void update_depend_map(JOIN *join, ORDER *order)
+static void update_depend_map_for_order(JOIN *join, ORDER *order)
 {
   for (; order ; order=order->next)
   {
@@ -7375,21 +10177,30 @@ static ORDER *
 remove_const(JOIN *join,ORDER *first_order, COND *cond,
              bool change_list, bool *simple_order)
 {
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
     return change_list ? 0 : first_order;		// No need to sort
 
   ORDER *order,**prev_ptr;
-  table_map first_table= join->join_tab[join->const_tables].table->map;
+  table_map first_table;
   table_map not_const_tables= ~join->const_table_map;
   table_map ref;
+  bool first_is_base_table= FALSE;
   DBUG_ENTER("remove_const");
+  
+  LINT_INIT(first_table); /* protected by first_is_base_table */
+  if (join->join_tab[join->const_tables].table)
+  {
+    first_table= join->join_tab[join->const_tables].table->map;
+    first_is_base_table= TRUE;
+  }
+  
 
   prev_ptr= &first_order;
   *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1;
 
   /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
 
-  update_depend_map(join, first_order);
+  update_depend_map_for_order(join, first_order);
   for (order=first_order; order ; order=order->next)
   {
     table_map order_tables=order->item[0]->used_tables();
@@ -7404,16 +10215,21 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
           table for all queries containing more than one table, ROLLUP, and an
           outer join.
          */
-        (join->tables > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
+        (join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
         join->outer_join))
       *simple_order=0;				// Must do a temp table to sort
     else if (!(order_tables & not_const_tables))
     {
-      if (order->item[0]->with_subselect && 
-          !(join->select_lex->options & SELECT_DESCRIBE))
-        order->item[0]->val_str(&order->item[0]->str_value);
+      if (order->item[0]->with_subselect)
+      {
+        /*
+          Delay the evaluation of constant ORDER and/or GROUP expressions that
+          contain subqueries until the execution phase.
+        */
+        join->exec_const_order_group_cond.push_back(order->item[0]);
+      }
       DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
-      continue;					// skip const item
+      continue;
     }
     else
     {
@@ -7426,7 +10242,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
 	  DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
 	  continue;
 	}
-	if ((ref=order_tables & (not_const_tables ^ first_table)))
+	if (first_is_base_table && (ref=order_tables & (not_const_tables ^ first_table)))
 	{
 	  if (!(order_tables & first_table) &&
               only_eq_ref_tables(join,first_order, ref))
@@ -7492,7 +10308,7 @@ ORDER *simple_remove_const(ORDER *order, COND *where)
 
 
 static int
-return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
+return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> &tables,
 		 List<Item> &fields, bool send_row, ulonglong select_options,
 		 const char *info, Item *having)
 {
@@ -7508,9 +10324,13 @@ return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
 
   if (send_row)
   {
-    for (TABLE_LIST *table= tables; table; table= table->next_leaf)
+    List_iterator<TABLE_LIST> ti(tables);
+    TABLE_LIST *table;
+    while ((table= ti++))
       mark_as_null_row(table->table);		// All fields are NULL
-    if (having && having->val_int() == 0)
+    if (having &&
+        !having->walk(&Item::clear_sum_processor, FALSE, NULL) &&
+        having->val_int() == 0)
       send_row=0;
   }
   if (!(result->send_result_set_metadata(fields,
@@ -7523,7 +10343,7 @@ return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
       Item *item;
       while ((item= it++))
 	item->no_rows_in_result();
-      send_error= result->send_data(fields);
+      send_error= result->send_data(fields) > 0;
     }
     if (!send_error)
       result->send_eof();				// Should be safe
@@ -7542,8 +10362,11 @@ static void clear_tables(JOIN *join)
     must clear only the non-const tables, as const tables
     are not re-calculated.
   */
-  for (uint i=join->const_tables ; i < join->tables ; i++)
-    mark_as_null_row(join->all_tables[i]);		// All fields are NULL
+  for (uint i= 0 ; i < join->table_count ; i++)
+  {
+    if (!(join->table[i]->map & join->const_table_map))
+      mark_as_null_row(join->table[i]);		// All fields are NULL
+  }
 }
 
 /*****************************************************************************
@@ -7705,24 +10528,26 @@ finish:
 static bool check_simple_equality(Item *left_item, Item *right_item,
                                   Item *item, COND_EQUAL *cond_equal)
 {
+  Item *orig_left_item= left_item;
+  Item *orig_right_item= right_item;
   if (left_item->type() == Item::REF_ITEM &&
       ((Item_ref*)left_item)->ref_type() == Item_ref::VIEW_REF)
   {
-    if (((Item_ref*)left_item)->depended_from)
+    if (((Item_ref*)left_item)->get_depended_from())
       return FALSE;
     left_item= left_item->real_item();
   }
   if (right_item->type() == Item::REF_ITEM &&
       ((Item_ref*)right_item)->ref_type() == Item_ref::VIEW_REF)
   {
-    if (((Item_ref*)right_item)->depended_from)
+    if (((Item_ref*)right_item)->get_depended_from())
       return FALSE;
     right_item= right_item->real_item();
   }
   if (left_item->type() == Item::FIELD_ITEM &&
       right_item->type() == Item::FIELD_ITEM &&
-      !((Item_field*)left_item)->depended_from &&
-      !((Item_field*)right_item)->depended_from)
+      !((Item_field*)left_item)->get_depended_from() &&
+      !((Item_field*)right_item)->get_depended_from())
   {
     /* The predicate the form field1=field2 is processed */
 
@@ -7771,7 +10596,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     { 
       /* left item was found in the current or one of the upper levels */
       if (! right_item_equal)
-        left_item_equal->add((Item_field *) right_item);
+        left_item_equal->add(orig_right_item);
       else
       {
         /* Merge two multiple equalities forming a new one */
@@ -7786,12 +10611,13 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     { 
       /* left item was not found neither the current nor in upper levels  */
       if (right_item_equal)
-        right_item_equal->add((Item_field *) left_item);
+        right_item_equal->add(orig_left_item);
       else 
       {
         /* None of the fields was found in multiple equalities */
-        Item_equal *item_equal= new Item_equal((Item_field *) left_item,
-                                               (Item_field *) right_item);
+        Item_equal *item_equal= new Item_equal(orig_left_item,
+                                               orig_right_item,
+                                               FALSE);
         cond_equal->current_level.push_back(item_equal);
       }
     }
@@ -7802,18 +10628,21 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     /* The predicate of the form field=const/const=field is processed */
     Item *const_item= 0;
     Item_field *field_item= 0;
+    Item *orig_field_item= 0;
     if (left_item->type() == Item::FIELD_ITEM &&
-        !((Item_field*)left_item)->depended_from &&
-        right_item->const_item())
+        !((Item_field*)left_item)->get_depended_from() &&
+        right_item->const_item() && !right_item->is_expensive())
     {
-      field_item= (Item_field*) left_item;
+      orig_field_item= orig_left_item;
+      field_item= (Item_field *) left_item;
       const_item= right_item;
     }
     else if (right_item->type() == Item::FIELD_ITEM &&
-             !((Item_field*)right_item)->depended_from &&
-             left_item->const_item())
+             !((Item_field*)right_item)->get_depended_from() &&
+             left_item->const_item() && !left_item->is_expensive())
     {
-      field_item= (Item_field*) right_item;
+      orig_field_item= orig_right_item;
+      field_item= (Item_field *) right_item;
       const_item= left_item;
     }
 
@@ -7822,13 +10651,13 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     {
       bool copyfl;
 
-      if (field_item->result_type() == STRING_RESULT)
+      if (field_item->cmp_type() == STRING_RESULT)
       {
         CHARSET_INFO *cs= ((Field_str*) field_item->field)->charset();
         if (!item)
         {
           Item_func_eq *eq_item;
-          if ((eq_item= new Item_func_eq(left_item, right_item)))
+          if ((eq_item= new Item_func_eq(orig_left_item, orig_right_item)))
             return FALSE;
           eq_item->set_cmp_func();
           eq_item->quick_fix_field();
@@ -7853,11 +10682,11 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
           already contains a constant and its value is  not equal to
           the value of const_item.
         */
-        item_equal->add(const_item, field_item);
+        item_equal->add_const(const_item, orig_field_item);
       }
       else
       {
-        item_equal= new Item_equal(const_item, field_item);
+        item_equal= new Item_equal(const_item, orig_field_item, TRUE);
         cond_equal->current_level.push_back(item_equal);
       }
       return TRUE;
@@ -8099,10 +10928,10 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       List_iterator_fast<Item_equal> it(cond_equal.current_level);
       while ((item_equal= it++))
       {
-        item_equal->fix_length_and_dec();
+        item_equal->fix_fields(thd, NULL);
         item_equal->update_used_tables();
         set_if_bigger(thd->lex->current_select->max_equal_elems,
-                      item_equal->members());  
+                      item_equal->n_field_items());  
       }
 
       ((Item_cond_and*)cond)->cond_equal= cond_equal;
@@ -8133,7 +10962,8 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       args->concat((List<Item> *)&cond_equal.current_level);
     }
   }
-  else if (cond->type() == Item::FUNC_ITEM)
+  else if (cond->type() == Item::FUNC_ITEM ||
+           cond->real_item()->type() == Item::FIELD_ITEM)
   {
     List<Item> eq_list;
     /*
@@ -8155,10 +10985,10 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       {
         if ((item_equal= cond_equal.current_level.pop()))
         {
-          item_equal->fix_length_and_dec();
+          item_equal->fix_fields(thd, NULL);
           item_equal->update_used_tables();
           set_if_bigger(thd->lex->current_select->max_equal_elems,
-                        item_equal->members());  
+                        item_equal->n_field_items());  
           return item_equal;
 	}
 
@@ -8179,7 +11009,7 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
           item_equal->fix_length_and_dec();
           item_equal->update_used_tables();
           set_if_bigger(thd->lex->current_select->max_equal_elems,
-                        item_equal->members());  
+                        item_equal->n_field_items());  
         }
         and_cond->cond_equal= cond_equal;
         args->concat((List<Item> *)&cond_equal.current_level);
@@ -8193,7 +11023,7 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       as soon the field is not of a string type or the field reference is
       an argument of a comparison predicate. 
     */ 
-    uchar *is_subst_valid= (uchar *) 1;
+    uchar* is_subst_valid= (uchar *) Item::ANY_SUBST;
     cond= cond->compile(&Item::subst_argument_checker,
                         &is_subst_valid, 
                         &Item::equal_fields_propagator,
@@ -8270,8 +11100,7 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
     pointer to the transformed condition containing multiple equalities
 */
    
-static COND *build_equal_items(THD *thd, COND *cond,
-                               COND_EQUAL *inherited,
+static COND *build_equal_items(THD *thd, COND *cond, COND_EQUAL *inherited,
                                List<TABLE_LIST> *join_list,
                                COND_EQUAL **cond_equal_ref)
 {
@@ -8284,6 +11113,7 @@ static COND *build_equal_items(THD *thd, COND *cond,
     if (cond->type() == Item::COND_ITEM &&
         ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
       cond_equal= &((Item_cond_and*) cond)->cond_equal;
+
     else if (cond->type() == Item::FUNC_ITEM &&
              ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
     {
@@ -8327,10 +11157,14 @@ static COND *build_equal_items(THD *thd, COND *cond,
 /**
   Compare field items by table order in the execution plan.
 
+    If field1 and field2 belong to different tables then
     field1 considered as better than field2 if the table containing
     field1 is accessed earlier than the table containing field2.   
     The function finds out what of two fields is better according
     this criteria.
+    If field1 and field2 belong to the same table then the result
+    of comparison depends on whether the fields are parts of
+    the key that are used to access this table.  
 
   @param field1          first field item to compare
   @param field2          second field item to compare
@@ -8344,18 +11178,24 @@ static COND *build_equal_items(THD *thd, COND *cond,
     0  otherwise
 */
 
-static int compare_fields_by_table_order(Item_field *field1,
-                                  Item_field *field2,
-                                  void *table_join_idx)
+static int compare_fields_by_table_order(Item *field1,
+                                         Item *field2,
+                                         void *table_join_idx)
 {
   int cmp= 0;
   bool outer_ref= 0;
-  if (field2->used_tables() & OUTER_REF_TABLE_BIT)
+  Item_field *f1= (Item_field *) (field1->real_item());
+  Item_field *f2= (Item_field *) (field2->real_item());
+  if (f1->const_item())
+    return 1;
+  if (f2->const_item())
+    return -1;
+  if (f2->used_tables() & OUTER_REF_TABLE_BIT)
   {  
     outer_ref= 1;
     cmp= -1;
   }
-  if (field2->used_tables() & OUTER_REF_TABLE_BIT)
+  if (f1->used_tables() & OUTER_REF_TABLE_BIT)
   {
     outer_ref= 1;
     cmp++;
@@ -8363,11 +11203,75 @@ static int compare_fields_by_table_order(Item_field *field1,
   if (outer_ref)
     return cmp;
   JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
-  cmp= idx[field2->field->table->tablenr]-idx[field1->field->table->tablenr];
+  
+  JOIN_TAB *tab1= idx[f1->field->table->tablenr];
+  JOIN_TAB *tab2= idx[f2->field->table->tablenr];
+  
+  /* 
+    if one of the table is inside a merged SJM nest and another one isn't,
+    compare SJM bush roots of the tables.
+  */
+  if (tab1->bush_root_tab != tab2->bush_root_tab)
+  {
+    if (tab1->bush_root_tab)
+      tab1= tab1->bush_root_tab;
+
+    if (tab2->bush_root_tab)
+      tab2= tab2->bush_root_tab;
+  }
+  
+  cmp= tab2 - tab1;
+
+  if (!cmp)
+  {
+    JOIN_TAB *tab= idx[f1->field->table->tablenr];
+    uint keyno= MAX_KEY;
+    if (tab->ref.key_parts)
+      keyno= tab->ref.key;
+    else if (tab->select && tab->select->quick)
+       keyno = tab->select->quick->index;
+    if (keyno != MAX_KEY)
+    {
+      if (f2->field->part_of_key.is_set(keyno))
+        cmp= -1;
+      if (f1->field->part_of_key.is_set(keyno))
+        cmp++;
+      if (!cmp)
+      {
+        KEY *key_info= tab->table->key_info + keyno;
+        for (uint i= 0; i < key_info->key_parts; i++)
+	{
+          Field *fld= key_info->key_part[i].field;
+          if (fld->eq(f2->field))
+	  {
+	    cmp= -1;
+            break;
+          }
+          if (fld->eq(f1->field))
+	  {
+	    cmp= 1;
+            break;
+          }
+        }
+      }              
+    }              
+    else   
+      cmp= f2->field->field_index-f1->field->field_index;
+  }
   return cmp < 0 ? -1 : (cmp ? 1 : 0);
 }
 
 
+static TABLE_LIST* embedding_sjm(Item *item)
+{
+  Item_field *item_field= (Item_field *) (item->real_item());
+  TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
+  if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
+    return nest;
+  else
+    return NULL;
+}
+
 /**
   Generate minimal set of simple equalities equivalent to a multiple equality.
 
@@ -8401,6 +11305,23 @@ static int compare_fields_by_table_order(Item_field *field1,
     So only t1.a=t3.c should be left in the lower level.
     If cond is equal to 0, then not more then one equality is generated
     and a pointer to it is returned as the result of the function.
+    
+    Equality substutution and semi-join materialization nests:
+
+       In case join order looks like this:
+
+          outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 
+
+        We must not construct equalities like 
+
+           outer_tbl1.col = inner_tbl1.col 
+
+        because they would get attached to inner_tbl1 and will get evaluated
+        during materialization phase, when we don't have current value of
+        outer_tbl1.col.
+
+        Item_equal::get_first() also takes similar measures for dealing with
+        equality substitution in presense of SJM nests.
 
   @return
     - The condition with generated simple equalities or
@@ -8408,79 +11329,141 @@ static int compare_fields_by_table_order(Item_field *field1,
     - 0, otherwise.
 */
 
-static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
-                                  Item_equal *item_equal)
+Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+                           Item_equal *item_equal)
 {
   List<Item> eq_list;
   Item_func_eq *eq_item= 0;
   if (((Item *) item_equal)->const_item() && !item_equal->val_int())
     return new Item_int((longlong) 0,1); 
   Item *item_const= item_equal->get_const();
-  Item_equal_iterator it(*item_equal);
+  Item_equal_fields_iterator it(*item_equal);
   Item *head;
+  DBUG_ASSERT(!cond || cond->type() == Item::COND_ITEM);
+
+  TABLE_LIST *current_sjm= NULL;
+  Item *current_sjm_head= NULL;
+
+  /* 
+    Pick the "head" item: the constant one or the first in the join order
+    (if the first in the join order happends to be inside an SJM nest, that's
+    ok, because this is where the value will be unpacked after
+    materialization).
+  */
   if (item_const)
     head= item_const;
   else
   {
-    head= item_equal->get_first();
+    TABLE_LIST *emb_nest;
+    head= item_equal->get_first(NULL);
     it++;
+    if ((emb_nest= embedding_sjm(head)))
+    {
+      current_sjm= emb_nest;
+      current_sjm_head= head;
+    }
   }
-  Item_field *item_field;
-  while ((item_field= it++))
+
+  Item *field_item;
+  /*
+    For each other item, generate "item=head" equality (except the tables that 
+    are within SJ-Materialization nests, for those "head" is defined
+    differently)
+  */
+  while ((field_item= it++))
   {
-    Item_equal *upper= item_field->find_item_equal(upper_levels);
-    Item_field *item= item_field;
+    Item_equal *upper= field_item->find_item_equal(upper_levels);
+    Item *item= field_item;
+    TABLE_LIST *field_sjm= embedding_sjm(field_item);
+    if (!field_sjm)
+    { 
+      current_sjm= NULL;
+      current_sjm_head= NULL;
+    }      
+
+    /* 
+      Check if "item_field=head" equality is already guaranteed to be true 
+      on upper AND-levels.
+    */
     if (upper)
     { 
       if (item_const && upper->get_const())
         item= 0;
       else
       {
-        Item_equal_iterator li(*item_equal);
-        while ((item= li++) != item_field)
+        Item_equal_fields_iterator li(*item_equal);
+        while ((item= li++) != field_item)
         {
           if (item->find_item_equal(upper_levels) == upper)
             break;
         }
       }
     }
-    if (item == item_field)
+    
+    bool produce_equality= test(item == field_item);
+    if (!item_const && field_sjm && field_sjm != current_sjm)
+    {
+      /* Entering an SJM nest */
+      current_sjm_head= field_item;
+      if (!field_sjm->sj_mat_info->is_sj_scan)
+        produce_equality= FALSE;
+    }
+
+    if (produce_equality)
     {
       if (eq_item)
         eq_list.push_back(eq_item);
-      eq_item= new Item_func_eq(item_field, head);
+      
+      /*
+        If we're inside an SJM-nest (current_sjm!=NULL), and the multi-equality
+        doesn't include a constant, we should produce equality with the first
+        of the equals in this SJM.
+
+        In other cases, get the "head" item, which is either first of the
+        equals on top level, or the constant.
+      */
+      Item *head_item= (!item_const && current_sjm)? current_sjm_head: head;
+      Item *head_real_item=  head_item->real_item();
+      if (head_real_item->type() == Item::FIELD_ITEM)
+        head_item= head_real_item;
+      
+      eq_item= new Item_func_eq(field_item->real_item(), head_item);
+
       if (!eq_item)
         return 0;
       eq_item->set_cmp_func();
       eq_item->quick_fix_field();
-   }
+    }
+    current_sjm= field_sjm;
   }
 
-  if (!cond && !eq_list.head())
+  if (!cond)
   {
-    if (!eq_item)
-      return new Item_int((longlong) 1,1);
-    return eq_item;
-  }
-
-  if (eq_item)
+    if (eq_list.is_empty())
+    {
+      if (eq_item)
+        return eq_item;
+      return new Item_int((longlong) 1, 1);
+    }
+    /* eq_item is always set if list is not empty */
+    DBUG_ASSERT(eq_item);
     eq_list.push_back(eq_item);
-  if (!cond)
-    cond= new Item_cond_and(eq_list);
+    if (!(cond= new Item_cond_and(eq_list)))
+      return 0;                                 // Error
+  }
   else
   {
-    DBUG_ASSERT(cond->type() == Item::COND_ITEM);
-    if (eq_list.elements)
+    if (eq_item)
+      eq_list.push_back(eq_item);
+    if (!eq_list.is_empty())
       ((Item_cond *) cond)->add_at_head(&eq_list);
   }
-
   cond->quick_fix_field();
   cond->update_used_tables();
    
   return cond;
 }
 
-
 /**
   Substitute every field reference in a condition by the best equal field
   and eliminate all multiple equality predicates.
@@ -8514,6 +11497,7 @@ static COND* substitute_for_best_equal_field(COND *cond,
                                              void *table_join_idx)
 {
   Item_equal *item_equal;
+  COND *org_cond= cond;                 // Return this in case of fatal error
 
   if (cond->type() == Item::COND_ITEM)
   {
@@ -8537,7 +11521,7 @@ static COND* substitute_for_best_equal_field(COND *cond,
     Item *item;
     while ((item= li++))
     {
-      Item *new_item =substitute_for_best_equal_field(item, cond_equal,
+      Item *new_item= substitute_for_best_equal_field(item, cond_equal,
                                                       table_join_idx);
       /*
         This works OK with PS/SP re-execution as changes are made to
@@ -8556,6 +11540,8 @@ static COND* substitute_for_best_equal_field(COND *cond,
         // This occurs when eliminate_item_equal() founds that cond is
         // always false and substitutes it with Item_int 0.
         // Due to this, value of item_equal will be 0, so just return it.
+        if (!cond)
+          return org_cond;                      // Error
         if (cond->type() != Item::COND_ITEM)
           break;
       }
@@ -8572,10 +11558,21 @@ static COND* substitute_for_best_equal_field(COND *cond,
     item_equal->sort(&compare_fields_by_table_order, table_join_idx);
     if (cond_equal && cond_equal->current_level.head() == item_equal)
       cond_equal= 0;
-    return eliminate_item_equal(0, cond_equal, item_equal);
+    cond= eliminate_item_equal(0, cond_equal, item_equal);
+    return cond ? cond : org_cond;
+  }
+  else 
+  {
+    while (cond_equal)
+    {
+      List_iterator_fast<Item_equal> it(cond_equal->current_level);
+      while((item_equal= it++))
+      {
+        cond= cond->transform(&Item::replace_equal_field, (uchar *) item_equal);
+      }
+      cond_equal= cond_equal->upper_levels;
+    }
   }
-  else
-    cond->transform(&Item::replace_equal_field, 0);
   return cond;
 }
 
@@ -8615,11 +11612,10 @@ static void update_const_equal_items(COND *cond, JOIN_TAB *tab)
     if (!contained_const && item_equal->get_const())
     {
       /* Update keys for range analysis */
-      Item_equal_iterator it(*item_equal);
-      Item_field *item_field;
-      while ((item_field= it++))
+      Item_equal_fields_iterator it(*item_equal);
+      while (it++)
       {
-        Field *field= item_field->field;
+        Field *field= it.get_curr_field();
         JOIN_TAB *stat= field->table->reginfo.join_tab;
         key_map possible_keys= field->key_start;
         possible_keys.intersect(field->table->keys_in_use_for_query);
@@ -8635,7 +11631,7 @@ static void update_const_equal_items(COND *cond, JOIN_TAB *tab)
           TABLE *tab= field->table;
           KEYUSE *use;
           for (use= stat->keyuse; use && use->table == tab; use++)
-            if (possible_keys.is_set(use->key) && 
+            if (!use->is_for_hash_join() && possible_keys.is_set(use->key) && 
                 tab->key_info[use->key].key_part[use->keypart].field ==
                 field)
               tab->const_key_parts[use->key]|= use->keypart_map;
@@ -8727,37 +11723,6 @@ change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
   }
 }
 
-/**
-  Remove additional condition inserted by IN/ALL/ANY transformation.
-
-  @param conds   condition for processing
-
-  @return
-    new conditions
-*/
-
-static Item *remove_additional_cond(Item* conds)
-{
-  if (conds->name == in_additional_cond)
-    return 0;
-  if (conds->type() == Item::COND_ITEM)
-  {
-    Item_cond *cnd= (Item_cond*) conds;
-    List_iterator<Item> li(*(cnd->argument_list()));
-    Item *item;
-    while ((item= li++))
-    {
-      if (item->name == in_additional_cond)
-      {
-	li.remove();
-	if (cnd->argument_list()->elements == 1)
-	  return cnd->argument_list()->head();
-	return conds;
-      }
-    }
-  }
-  return conds;
-}
 
 static void
 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
@@ -8795,10 +11760,10 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
     {
       Item_func_eq *func=(Item_func_eq*) cond;
       Item **args= func->arguments();
-      bool left_const= args[0]->const_item();
-      bool right_const= args[1]->const_item();
+      bool left_const= args[0]->const_item() && !args[0]->is_expensive();
+      bool right_const= args[1]->const_item() && !args[1]->is_expensive();
       if (!(left_const && right_const) &&
-          args[0]->result_type() == args[1]->result_type())
+          args[0]->cmp_type() == args[1]->cmp_type())
       {
 	if (right_const)
 	{
@@ -8819,7 +11784,6 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
   }
 }
 
-
 /**
   Simplify joins replacing outer joins by inner joins whenever it's
   possible.
@@ -8914,13 +11878,18 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
     consider any plan where one of the inner tables is before some of outer
     tables.
 
-
+  IMPLEMENTATION
     The function is implemented by a recursive procedure.  On the recursive
     ascent all attributes are calculated, all outer joins that can be
     converted are replaced and then all unnecessary braces are removed.
     As join list contains join tables in the reverse order sequential
     elimination of outer joins does not require extra recursive calls.
 
+  SEMI-JOIN NOTES
+    Remove all semi-joins that have are within another semi-join (i.e. have
+    an "ancestor" semi-join nest)
+
+  EXAMPLES
     Here is an example of a join query with invalid cross references:
     @code
       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b 
@@ -8930,14 +11899,15 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
   @param join_list   list representation of the join to be converted
   @param conds       conditions to add on expressions for converted joins
   @param top         true <=> conds is the where condition
-
+  @param in_sj       TRUE <=> processing semi-join nest's children
   @return
     - The new condition, if success
     - 0, otherwise
 */
 
 static COND *
-simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
+simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top,
+               bool in_sj)
 {
   TABLE_LIST *table;
   NESTED_JOIN *nested_join;
@@ -8973,7 +11943,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
            the corresponding on expression is added to E. 
 	*/ 
         expr= simplify_joins(join, &nested_join->join_list,
-                             expr, FALSE);
+                             expr, FALSE, in_sj || table->sj_on_expr);
 
         if (!table->prep_on_expr || expr != table->on_expr)
         {
@@ -8985,9 +11955,12 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
       }
       nested_join->used_tables= (table_map) 0;
       nested_join->not_null_tables=(table_map) 0;
-      conds= simplify_joins(join, &nested_join->join_list, conds, top);
+      conds= simplify_joins(join, &nested_join->join_list, conds, top, 
+                            in_sj || table->sj_on_expr);
       used_tables= nested_join->used_tables;
       not_null_tables= nested_join->not_null_tables;  
+      /* The following two might become unequal after table elimination: */
+      nested_join->n_tables= nested_join->join_list.elements;
     }
     else
     {
@@ -9010,10 +11983,12 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
         For some of the inner tables there are conjunctive predicates
         that reject nulls => the outer join can be replaced by an inner join.
       */
+      if (table->outer_join && !table->embedding && table->table)
+        table->table->maybe_null= FALSE;
       table->outer_join= 0;
       if (table->on_expr)
       {
-        /* Add on expression to the where condition. */
+        /* Add ON expression to the WHERE or upper-level ON condition. */
         if (conds)
         {
           conds= and_conds(conds, table->on_expr);
@@ -9073,7 +12048,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
           For example it might happen if RAND() function
           is used in JOIN ON clause.
 	*/  
-        if (!((prev_table->on_expr->used_tables() & ~RAND_TABLE_BIT) &
+        if (!((prev_table->on_expr->used_tables() &
+               ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) &
               ~prev_used_tables))
           prev_table->dep_tables|= used_tables;
       }
@@ -9081,23 +12057,50 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
     prev_table= table;
   }
     
-  /* Flatten nested joins that can be flattened. */
+  /* 
+    Flatten nested joins that can be flattened.
+    no ON expression and not a semi-join => can be flattened.
+  */
   TABLE_LIST *right_neighbor= NULL;
   li.rewind();
   while ((table= li++))
   {
     bool fix_name_res= FALSE;
     nested_join= table->nested_join;
-    if (nested_join && !table->on_expr)
+    if (table->sj_on_expr && !in_sj)
     {
+       /*
+         If this is a semi-join that is not contained within another semi-join, 
+         leave it intact (otherwise it is flattened)
+       */
+      join->select_lex->sj_nests.push_back(table);
+
+      /* 
+        Also, walk through semi-join children and mark those that are now
+        top-level
+      */
       TABLE_LIST *tbl;
       List_iterator<TABLE_LIST> it(nested_join->join_list);
       while ((tbl= it++))
       {
+        if (!tbl->on_expr && tbl->table)
+          tbl->table->maybe_null= FALSE;
+      }
+    }
+    else if (nested_join && !table->on_expr)
+    {
+      TABLE_LIST *tbl;
+      List_iterator<TABLE_LIST> it(nested_join->join_list);
+      List<TABLE_LIST> repl_list;  
+      while ((tbl= it++))
+      {
         tbl->embedding= table->embedding;
+        if (!tbl->embedding && !tbl->on_expr && tbl->table)
+          tbl->table->maybe_null= FALSE;
         tbl->join_list= table->join_list;
+        repl_list.push_back(tbl);
       }
-      li.replace(nested_join->join_list);
+      li.replace(repl_list);
       /* Need to update the name resolution table chain when flattening joins */
       fix_name_res= TRUE;
       table= *li.ref();
@@ -9115,8 +12118,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
 /**
   Assign each nested join structure a bit in nested_join_map.
 
-    Assign each nested join structure (except "confluent" ones - those that
-    embed only one element) a bit in nested_join_map.
+    Assign each nested join structure (except ones that embed only one element
+    and so are redundant) a bit in nested_join_map.
 
   @param join          Join being processed
   @param join_list     List of tables
@@ -9125,7 +12128,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
 
   @note
     This function is called after simplify_joins(), when there are no
-    redundant nested joins, #non_confluent_nested_joins <= #tables_in_join so
+    redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so
     we will not run out of bits in nested_join_map.
 
   @return
@@ -9152,9 +12155,11 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
             with anything)
         2. we could run out bits in nested_join_map otherwise.
       */
-      if (nested_join->join_list.elements != 1)
+      if (nested_join->n_tables != 1)
       {
-        nested_join->nj_map= (nested_join_map) 1 << first_unused++;
+        /* Don't assign bits to sj-nests */
+        if (table->on_expr)
+          nested_join->nj_map= (nested_join_map) 1 << first_unused++;
         first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
                                                     first_unused);
       }
@@ -9174,21 +12179,28 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
                     tables which will be ignored.
 */
 
-static void reset_nj_counters(List<TABLE_LIST> *join_list)
+static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list)
 {
   List_iterator<TABLE_LIST> li(*join_list);
   TABLE_LIST *table;
   DBUG_ENTER("reset_nj_counters");
+  uint n=0;
   while ((table= li++))
   {
     NESTED_JOIN *nested_join;
+    bool is_eliminated_nest= FALSE;
     if ((nested_join= table->nested_join))
     {
       nested_join->counter= 0;
-      reset_nj_counters(&nested_join->join_list);
+      nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list);
+      if (!nested_join->n_tables)
+        is_eliminated_nest= TRUE;
     }
+    if ((table->nested_join && !is_eliminated_nest) || 
+        (!table->nested_join && (table->table->map & ~join->eliminated_tables)))
+      n++;
   }
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(n);
 }
 
 
@@ -9300,28 +12312,31 @@ static bool check_interleaving_with_nj(JOIN_TAB *next_tab)
     Do update counters for "pairs of brackets" that we've left (marked as
     X,Y,Z in the above picture)
   */
-  for (;next_emb; next_emb= next_emb->embedding)
+  for (;next_emb && next_emb != join->emb_sjm_nest; next_emb= next_emb->embedding)
   {
-    next_emb->nested_join->counter++;
-    if (next_emb->nested_join->counter == 1)
+    if (!next_emb->sj_on_expr)
     {
-      /* 
-        next_emb is the first table inside a nested join we've "entered". In
-        the picture above, we're looking at the 'X' bracket. Don't exit yet as
-        X bracket might have Y pair bracket.
+      next_emb->nested_join->counter++;
+      if (next_emb->nested_join->counter == 1)
+      {
+        /* 
+          next_emb is the first table inside a nested join we've "entered". In
+          the picture above, we're looking at the 'X' bracket. Don't exit yet as
+          X bracket might have Y pair bracket.
+        */
+        join->cur_embedding_map |= next_emb->nested_join->nj_map;
+      }
+      
+      if (next_emb->nested_join->n_tables !=
+          next_emb->nested_join->counter)
+        break;
+
+      /*
+        We're currently at Y or Z-bracket as depicted in the above picture.
+        Mark that we've left it and continue walking up the brackets hierarchy.
       */
-      join->cur_embedding_map |= next_emb->nested_join->nj_map;
+      join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
     }
-    
-    if (next_emb->nested_join->join_list.elements !=
-        next_emb->nested_join->counter)
-      break;
-
-    /*
-      We're currently at Y or Z-bracket as depicted in the above picture.
-      Mark that we've left it and continue walking up the brackets hierarchy.
-    */
-    join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
   }
   return FALSE;
 }
@@ -9383,33 +12398,138 @@ static void restore_prev_nj_state(JOIN_TAB *last)
 {
   TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding;
   JOIN *join= last->join;
-  for (;last_emb != NULL; last_emb= last_emb->embedding)
+  for (;last_emb != NULL && last_emb != join->emb_sjm_nest; 
+       last_emb= last_emb->embedding)
   {
-    NESTED_JOIN *nest= last_emb->nested_join;
-    DBUG_ASSERT(nest->counter > 0);
-    
-    bool was_fully_covered= nest->is_fully_covered();
-    
-    if (--nest->counter == 0)
-      join->cur_embedding_map&= ~nest->nj_map;
-    
-    if (!was_fully_covered)
-      break;
+    if (!last_emb->sj_on_expr)
+    {
+      NESTED_JOIN *nest= last_emb->nested_join;
+      DBUG_ASSERT(nest->counter > 0);
+      
+      bool was_fully_covered= nest->is_fully_covered();
+      
+      if (--nest->counter == 0)
+        join->cur_embedding_map&= ~nest->nj_map;
+      
+      if (!was_fully_covered)
+        break;
+      
+      join->cur_embedding_map|= nest->nj_map;
+    }
+  }
+}
+
+
+
+/*
+  Change access methods not to use join buffering and adjust costs accordingly
+
+  SYNOPSIS
+    optimize_wo_join_buffering()
+      join
+      first_tab               The first tab to do re-optimization for
+      last_tab                The last tab to do re-optimization for
+      last_remaining_tables   Bitmap of tables that are not in the
+                              [0...last_tab] join prefix
+      first_alt               TRUE <=> Use the LooseScan plan for the first_tab
+      no_jbuf_before          Don't allow to use join buffering before this
+                              table
+      reopt_rec_count     OUT New output record count
+      reopt_cost          OUT New join prefix cost
+
+  DESCRIPTION
+    Given a join prefix [0; ... first_tab], change the access to the tables
+    in the [first_tab; last_tab] not to use join buffering. This is needed
+    because some semi-join strategies cannot be used together with the join
+    buffering.
+    In general case the best table order in [first_tab; last_tab] range with
+    join buffering is different from the best order without join buffering but
+    we don't try finding a better join order. (TODO ask Igor why did we
+    chose not to do this in the end. that's actually the difference from the 
+    forking approach)
+*/
+
+void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, 
+                                table_map last_remaining_tables, 
+                                bool first_alt, uint no_jbuf_before,
+                                double *outer_rec_count, double *reopt_cost)
+{
+  double cost, rec_count;
+  table_map reopt_remaining_tables= last_remaining_tables;
+  uint i;
+
+  if (first_tab > join->const_tables)
+  {
+    cost=      join->positions[first_tab - 1].prefix_cost.total_cost();
+    rec_count= join->positions[first_tab - 1].prefix_record_count;
+  }
+  else
+  {
+    cost= 0.0;
+    rec_count= 1;
+  }
+
+  *outer_rec_count= rec_count;
+  for (i= first_tab; i <= last_tab; i++)
+    reopt_remaining_tables |= join->positions[i].table->table->map;
+  
+  /*
+    best_access_path() optimization depends on the value of 
+    join->cur_sj_inner_tables. Our goal in this function is to do a
+    re-optimization with disabled join buffering, but no other changes.
+    In order to achieve this, cur_sj_inner_tables needs have the same 
+    value it had during the original invocations of best_access_path. 
+
+    We know that this function, optimize_wo_join_buffering() is called to
+    re-optimize semi-join join order range, which allows to conclude that 
+    the "original" value of cur_sj_inner_tables was 0.
+  */
+  table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables;
+  join->cur_sj_inner_tables= 0;
+
+  for (i= first_tab; i <= last_tab; i++)
+  {
+    JOIN_TAB *rs= join->positions[i].table;
+    POSITION pos, loose_scan_pos;
     
-    join->cur_embedding_map|= nest->nj_map;
+    if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer)
+    {
+      /* Find the best access method that would not use join buffering */
+      best_access_path(join, rs, reopt_remaining_tables, i, 
+                       TRUE, rec_count,
+                       &pos, &loose_scan_pos);
+    }
+    else 
+      pos= join->positions[i];
+
+    if ((i == first_tab && first_alt))
+      pos= loose_scan_pos;
+
+    reopt_remaining_tables &= ~rs->table->map;
+    rec_count *= pos.records_read;
+    cost += pos.read_time;
+
+    if (!rs->emb_sj_nest)
+      *outer_rec_count *= pos.records_read;
   }
+  join->cur_sj_inner_tables= save_cur_sj_inner_tables;
+
+  *reopt_cost= cost;
 }
 
 
 static COND *
 optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
-              Item::cond_result *cond_value)
+              Item::cond_result *cond_value, COND_EQUAL **cond_equal)
 {
   THD *thd= join->thd;
   DBUG_ENTER("optimize_cond");
 
   if (!conds)
+  {
     *cond_value= Item::COND_TRUE;
+    build_equal_items(join->thd, NULL, NULL, join_list, cond_equal);
+  }  
   else
   {
     /* 
@@ -9440,7 +12560,7 @@ optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
 
 
 /**
-  Handles the reqursive job for remove_eq_conds()
+  Handles the recursive job  remove_eq_conds()
 
   Remove const and eq items. Return new item, or NULL if no condition
   cond_value is set to according:
@@ -9448,8 +12568,8 @@ optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
   COND_TRUE  always true	( 1 = 1 )
   COND_FALSE always false	( 1 = 2 )
 
-  SYNPOSIS
-    remove_eq_conds()
+  SYNOPSIS
+    internal_remove_eq_conds()
     thd 			THD environment
     cond                        the condition to handle
     cond_value                  the resulting value of the condition
@@ -9566,13 +12686,13 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
         cond->fix_fields(thd, &cond);
       }
     }
-    if (cond->const_item())
+    if (cond->const_item() && !cond->is_expensive())
     {
       *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
       return (COND*) 0;
     }
   }
-  else if (cond->const_item())
+  else if (cond->const_item() && !cond->is_expensive())
   {
     *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
     return (COND*) 0;
@@ -9592,7 +12712,6 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
   return cond;					// Point at next and level
 }
 
-
 /**
   Remove const and eq items. Return new item, or NULL if no condition
   cond_value is set to according:
@@ -9674,37 +12793,33 @@ remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
 /* 
   Check if equality can be used in removing components of GROUP BY/DISTINCT
   
-  SYNOPSIS
-    test_if_equality_guarantees_uniqueness()
-      l          the left comparison argument (a field if any)
-      r          the right comparison argument (a const of any)
-  
-  DESCRIPTION    
-    Checks if an equality predicate can be used to take away 
-    DISTINCT/GROUP BY because it is known to be true for exactly one 
-    distinct value (e.g. <expr> == <const>).
-    Arguments must be of the same type because e.g. 
-    <string_field> = <int_const> may match more than 1 distinct value from 
-    the column. 
-    We must take into consideration and the optimization done for various 
-    string constants when compared to dates etc (see Item_int_with_ref) as
-    well as the collation of the arguments.
+  @param    l          the left comparison argument (a field if any)
+  @param    r          the right comparison argument (a const of any)
   
-  RETURN VALUE  
-    TRUE    can be used
-    FALSE   cannot be used
+  @details
+  Checks if an equality predicate can be used to take away 
+  DISTINCT/GROUP BY because it is known to be true for exactly one 
+  distinct value (e.g. <expr> == <const>).
+  Arguments must be compared in the native type of the left argument
+  and (for strings) in the native collation of the left argument.
+  Otherwise, for example,
+  <string_field> = <int_const> may match more than 1 distinct value or
+  the <string_field>.
+
+  @note We don't need to aggregate l and r collations here, because r -
+  the constant item - has already been converted to a proper collation
+  for comparison. We only need to compare this collation with field's collation.
+
+  @retval true    can be used
+  @retval false   cannot be used
 */
 static bool
 test_if_equality_guarantees_uniqueness(Item *l, Item *r)
 {
   return r->const_item() &&
-    /* elements must be compared as dates */
-     (Arg_comparator::can_compare_as_dates(l, r, 0) ||
-      /* or of the same result type */
-      (r->result_type() == l->result_type() &&
-       /* and must have the same collation if compared as strings */
-       (l->result_type() != STRING_RESULT ||
-        l->collation.collation == r->collation.collation)));
+    item_cmp_type(l->cmp_type(), r->cmp_type()) == l->cmp_type() &&
+    (l->cmp_type() != STRING_RESULT ||
+     l->collation.collation == r->collation.collation);
 }
 
 
@@ -9865,6 +12980,8 @@ Field *create_tmp_field_from_field(THD *thd, Field *org_field,
       table->s->db_create_options|= HA_OPTION_PACK_RECORD;
     else if (org_field->type() == FIELD_TYPE_DOUBLE)
       ((Field_double *) new_field)->not_fixed= TRUE;
+    new_field->vcol_info= 0;
+    new_field->stored_in_db= TRUE;
   }
   return new_field;
 }
@@ -9923,15 +13040,12 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table,
   case STRING_RESULT:
     DBUG_ASSERT(item->collation.collation);
   
-    enum enum_field_types type;
     /*
       DATE/TIME and GEOMETRY fields have STRING_RESULT result type. 
       To preserve type they needed to be handled separately.
     */
-    if ((type= item->field_type()) == MYSQL_TYPE_DATETIME ||
-        type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE ||
-        type == MYSQL_TYPE_NEWDATE ||
-        type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY)
+    if (item->cmp_type() == TIME_RESULT ||
+        item->field_type() == MYSQL_TYPE_GEOMETRY)
       new_field= item->tmp_table_field_from_field_type(table, 1);
     /* 
       Make sure that the blob fits into a Field_varstring which has 
@@ -10072,13 +13186,30 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
       If item have to be able to store NULLs but underlaid field can't do it,
       create_tmp_field_from_field() can't be used for tmp field creation.
     */
-    if (field->maybe_null && !field->field->maybe_null())
+    if (((field->maybe_null && field->in_rollup) ||      
+	(thd->create_tmp_table_for_derived  &&    /* for mat. view/dt */
+	 orig_item && orig_item->maybe_null)) &&         
+        !field->field->maybe_null())
     {
+      bool save_maybe_null= FALSE;
+      /*
+        The item the ref points to may have maybe_null flag set while
+        the ref doesn't have it. This may happen for outer fields
+        when the outer query decided at some point after name resolution phase
+        that this field might be null. Take this into account here.
+      */
+      if (orig_item)
+      {
+        save_maybe_null= item->maybe_null;
+        item->maybe_null= orig_item->maybe_null;
+      }
       result= create_tmp_field_from_item(thd, item, table, NULL,
                                          modify_item, convert_blob_length);
       *from_field= field->field;
       if (result && modify_item)
         field->result_field= result;
+      if (orig_item)
+        item->maybe_null= save_maybe_null;
     } 
     else if (table_cant_handle_bit_fields && field->field->type() ==
              MYSQL_TYPE_BIT)
@@ -10101,7 +13232,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
       ((Item_ref*)orig_item)->set_result_field(result);
     /*
       Fields that are used as arguments to the DEFAULT() function already have
-      their data pointers set to the default value during name resulotion. See
+      their data pointers set to the default value during name resolution. See
       Item_default_value::fix_fields.
     */
     if (orig_type != Item::DEFAULT_VALUE_ITEM && field->field->eq_def(result))
@@ -10153,6 +13284,8 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
   case Item::REF_ITEM:
   case Item::NULL_ITEM:
   case Item::VARBIN_ITEM:
+  case Item::CACHE_ITEM:
+  case Item::EXPR_CACHE_ITEM:
     if (make_copy_field)
     {
       DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -10183,9 +13316,15 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
   uint field_count= table->s->fields;
   bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count,
               FALSE);
-  bitmap_init(&table->tmp_set,
+  bitmap_init(&table->def_vcol_set,
               (my_bitmap_map*) (bitmaps+ bitmap_buffer_size(field_count)),
               field_count, FALSE);
+  bitmap_init(&table->tmp_set,
+              (my_bitmap_map*) (bitmaps+ 2*bitmap_buffer_size(field_count)),
+              field_count, FALSE);
+  bitmap_init(&table->eq_join_set,
+              (my_bitmap_map*) (bitmaps+ 3*bitmap_buffer_size(field_count)),
+              field_count, FALSE);
   /* write_set and all_set are copies of read_set */
   table->def_write_set= table->def_read_set;
   table->s->all_set= table->def_read_set;
@@ -10220,16 +13359,11 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
                               be used for name resolving; can be "".
 */
 
-#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
-#define AVG_STRING_LENGTH_TO_PACK_ROWS   64
-#define RATIO_TO_PACK_ROWS	       2
-#define MIN_STRING_LENGTH_TO_PACK_ROWS   10
-
 TABLE *
-create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
+create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
 		 ORDER *group, bool distinct, bool save_sum_fields,
 		 ulonglong select_options, ha_rows rows_limit,
-		 const char *table_alias)
+                 const char *table_alias, bool do_not_open)
 {
   MEM_ROOT *mem_root_save, own_root;
   TABLE *table;
@@ -10253,7 +13387,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   KEY *keyinfo;
   KEY_PART_INFO *key_part_info;
   Item **copy_func;
-  MI_COLUMNDEF *recinfo;
+  ENGINE_COLUMNDEF *recinfo;
   /*
     total_uneven_bit_length is uneven bit length for visible fields
     hidden_uneven_bit_length is uneven bit length for hidden fields
@@ -10269,6 +13403,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
               (ulong) rows_limit,test(group)));
 
   status_var_increment(thd->status_var.created_tmp_tables);
+  thd->query_plan_flags|= QPLAN_TMP_TABLE;
 
   if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
     temp_pool_slot = bitmap_lock_set_next(&temp_pool);
@@ -10279,16 +13414,16 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   else
   {
     /* if we run out of slots or we are not using tempool */
-    sprintf(path,"%s%lx_%lx_%x", tmp_file_prefix,current_pid,
+    sprintf(path, "%s%lx_%lx_%x", tmp_file_prefix,current_pid,
             thd->thread_id, thd->tmp_table++);
   }
 
   /*
     No need to change table name to lower case as we are only creating
-    MyISAM or HEAP tables here
+    MyISAM, Aria or HEAP tables here
   */
-  fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
-
+  fn_format(path, path, mysql_tmpdir, "",
+            MY_REPLACE_EXT|MY_UNPACK_FILENAME);
 
   if (group)
   {
@@ -10296,6 +13431,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       group=0;					// Can't use group key
     else for (ORDER *tmp=group ; tmp ; tmp=tmp->next)
     {
+      /*
+        marker == 4 means two things:
+        - store NULLs in the key, and
+        - convert BIT fields to 64-bit long, needed because MEMORY tables
+          can't index BIT fields.
+      */
       (*tmp->item)->marker=4;			// Store null in key
       if ((*tmp->item)->max_length >= CONVERT_IF_BIGGER_TO_BLOB)
 	using_unique_constraint=1;
@@ -10337,7 +13478,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                         &tmpname, (uint) strlen(path)+1,
                         &group_buff, (group && ! using_unique_constraint ?
                                       param->group_length : 0),
-                        &bitmaps, bitmap_buffer_size(field_count)*2,
+                        &bitmaps, bitmap_buffer_size(field_count)*4,
                         NullS))
   {
     if (temp_pool_slot != MY_BIT_NONE)
@@ -10353,7 +13494,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     DBUG_RETURN(NULL);				/* purecov: inspected */
   }
   param->items_to_copy= copy_func;
-  strmov(tmpname,path);
+  strmov(tmpname, path);
   /* make table according to fields */
 
   bzero((char*) table,sizeof(*table));
@@ -10366,9 +13507,9 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   thd->mem_root= &table->mem_root;
 
   table->field=reg_field;
-  table->alias= table_alias;
+  table->alias.set(table_alias, strlen(table_alias), table_alias_charset);
+
   table->reginfo.lock_type=TL_WRITE;	/* Will be updated */
-  table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
   table->map=1;
   table->temp_pool_slot = temp_pool_slot;
   table->copy_blobs= 1;
@@ -10376,13 +13517,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   table->quick_keys.init();
   table->covering_keys.init();
   table->merge_keys.init();
+  table->intersect_keys.init();
   table->keys_in_use_for_query.init();
 
   table->s= share;
   init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
   share->blob_field= blob_field;
   share->blob_ptr_size= portable_sizeof_char_ptr;
-  share->db_low_byte_first=1;                // True for HEAP and MyISAM
   share->table_charset= param->table_charset;
   share->primary_key= MAX_KEY;               // Indicate no primary key
   share->keys_for_keyread.init();
@@ -10408,7 +13549,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       {
         if (item->used_tables() & OUTER_REF_TABLE_BIT)
           item->update_used_tables();
-        if (type == Item::SUBSELECT_ITEM ||
+        if ((item->real_type() == Item::SUBSELECT_ITEM) ||
             (item->used_tables() & ~OUTER_REF_TABLE_BIT))
         {
 	  /*
@@ -10494,17 +13635,45 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                          group != 0,
                          !force_copy_fields &&
                            (not_all_columns || group !=0),
-                         item->marker == 4, force_copy_fields,
+                         /*
+                           If item->marker == 4 then we force create_tmp_field
+                           to create a 64-bit longs for BIT fields because HEAP
+                           tables can't index BIT fields directly. We do the same
+                           for distinct, as we want the distinct index to be
+                           usable in this case too.
+                         */
+                         item->marker == 4  || param->bit_fields_as_long,
+                         force_copy_fields,
                          param->convert_blob_length);
 
       if (!new_field)
       {
 	if (thd->is_fatal_error)
 	  goto err;				// Got OOM
-	continue;				// Some kindf of const item
+	continue;				// Some kind of const item
       }
       if (type == Item::SUM_FUNC_ITEM)
-	((Item_sum *) item)->result_field= new_field;
+      {
+        Item_sum *agg_item= (Item_sum *) item;
+        /*
+          Update the result field only if it has never been set, or if the
+          created temporary table is not to be used for subquery
+          materialization.
+
+          The reason is that for subqueries that require materialization as part
+          of their plan, we create the 'external' temporary table needed for IN
+          execution, after the 'internal' temporary table needed for grouping.
+          Since both the external and the internal temporary tables are created
+          for the same list of SELECT fields of the subquery, setting
+          'result_field' for each invocation of create_tmp_table overrides the
+           previous value of 'result_field'.
+
+          The condition below prevents the creation of the external temp table
+          to override the 'result_field' that was set for the internal temp table.
+        */
+        if (!agg_item->result_field || !param->materialized_subquery)
+          agg_item->result_field= new_field;
+      }
       tmp_from_field++;
       reclength+=new_field->pack_length();
       if (!(new_field->flags & NOT_NULL_FLAG))
@@ -10516,6 +13685,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
         *blob_field++= fieldnr;
 	blob_count++;
       }
+      if (new_field->real_type() == MYSQL_TYPE_STRING ||
+          new_field->real_type() == MYSQL_TYPE_VARCHAR)
+      {
+        string_count++;
+        string_total_length+= new_field->pack_length();
+      }
       if (item->marker == 4 && item->maybe_null)
       {
 	group_null_items++;
@@ -10553,14 +13728,16 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   *reg_field= 0;
   *blob_field= 0;				// End marker
   share->fields= field_count;
+  share->column_bitmap_size= bitmap_buffer_size(share->fields);
 
   /* If result table is small; use a heap */
   /* future: storage engine selection can be made dynamic? */
   if (blob_count || using_unique_constraint
       || (thd->variables.big_tables && !(select_options & SELECT_SMALL_RESULT))
-      || (select_options & TMP_TABLE_FORCE_MYISAM))
+      || (select_options & TMP_TABLE_FORCE_MYISAM)
+      || thd->variables.tmp_table_size == 0)
   {
-    share->db_plugin= ha_lock_engine(0, myisam_hton);
+    share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
     table->file= get_new_handler(share, &table->mem_root,
                                  share->db_type());
     if (group &&
@@ -10577,7 +13754,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   if (!table->file)
     goto err;
 
-
   if (!using_unique_constraint)
     reclength+= group_null_items;	// null flag is stored separately
 
@@ -10600,8 +13776,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   /* Use packed rows if there is blobs or a lot of space to gain */
   if (blob_count ||
       (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
-      (reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
-       string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
+       (reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
+        string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
     use_packed_rows= 1;
 
   share->reclength= reclength;
@@ -10632,7 +13808,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 
     table->null_flags= (uchar*) table->record[0];
     share->null_fields= null_count+ hidden_null_count;
-    share->null_bytes= null_pack_length;
+    share->null_bytes= share->null_bytes_for_compare= null_pack_length;
   }
   null_count= (blob_count == 0) ? 1 : 0;
   hidden_field_count=param->hidden_field_count;
@@ -10714,22 +13890,25 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     /* Make entry for create table */
     recinfo->length=length;
     if (field->flags & BLOB_FLAG)
-      recinfo->type= (int) FIELD_BLOB;
+      recinfo->type= FIELD_BLOB;
     else if (use_packed_rows &&
              field->real_type() == MYSQL_TYPE_STRING &&
 	     length >= MIN_STRING_LENGTH_TO_PACK_ROWS)
-      recinfo->type=FIELD_SKIP_ENDSPACE;
+      recinfo->type= FIELD_SKIP_ENDSPACE;
+    else if (field->real_type() == MYSQL_TYPE_VARCHAR)
+      recinfo->type= FIELD_VARCHAR;
     else
-      recinfo->type=FIELD_NORMAL;
+      recinfo->type= FIELD_NORMAL;
+
     if (!--hidden_field_count)
       null_count=(null_count+7) & ~7;		// move to next byte
 
     // fix table name in field entry
-    field->table_name= &table->alias;
+    field->set_table_name(&table->alias);
   }
 
   param->copy_field_end=copy;
-  param->recinfo=recinfo;
+  param->recinfo= recinfo;              	// Pointer to after last field
   store_record(table,s->default_values);        // Make empty default record
 
   if (thd->variables.tmp_table_size == ~ (ulonglong) 0)		// No limit
@@ -10757,9 +13936,11 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     param->group_buff=group_buff;
     share->keys=1;
     share->uniques= test(using_unique_constraint);
-    table->key_info=keyinfo;
+    table->key_info= table->s->key_info= keyinfo;
+    table->keys_in_use_for_query.set_bit(0);
+    share->keys_in_use.set_bit(0);
     keyinfo->key_part=key_part_info;
-    keyinfo->flags=HA_NOSAME;
+    keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
     keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts;
     keyinfo->key_length=0;
     keyinfo->rec_per_key=0;
@@ -10773,6 +13954,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       bool maybe_null=(*cur_group->item)->maybe_null;
       key_part_info->null_bit=0;
       key_part_info->field=  field;
+      if (cur_group == group)
+        field->key_start.set_bit(0);
       key_part_info->offset= field->offset(table->record[0]);
       key_part_info->length= (uint16) field->key_length();
       key_part_info->type=   (uint8) field->key_type();
@@ -10785,12 +13968,26 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       if (!using_unique_constraint)
       {
 	cur_group->buff=(char*) group_buff;
+
+        if (maybe_null && !field->null_bit)
+        {
+          /*
+            This can only happen in the unusual case where an outer join
+            table was found to be not-nullable by the optimizer and we
+            the item can't really be null.
+            We solve this by marking the item as !maybe_null to ensure
+            that the key,field and item definition match.
+          */
+          (*cur_group->item)->maybe_null= maybe_null= 0;
+        }
+
 	if (!(cur_group->field= field->new_key_field(thd->mem_root,table,
                                                      group_buff +
                                                      test(maybe_null),
                                                      field->null_ptr,
                                                      field->null_bit)))
 	  goto err; /* purecov: inspected */
+
 	if (maybe_null)
 	{
 	  /*
@@ -10812,6 +14009,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       }
       keyinfo->key_length+=  key_part_info->length;
     }
+    /*
+      Ensure we didn't overrun the group buffer. The < is only true when
+      some maybe_null fields was changed to be not null fields.
+    */
+    DBUG_ASSERT(using_unique_constraint ||
+                group_buff <= param->group_buff + param->group_length);
   }
 
   if (distinct && field_count != param->hidden_field_count)
@@ -10824,29 +14027,41 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     */
     DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count));
 
-    null_pack_length-=hidden_null_pack_length;
-    keyinfo->key_parts= ((field_count-param->hidden_field_count)+
-			 test(null_pack_length));
-    table->distinct= 1;
-    share->keys= 1;
     if (blob_count)
     {
-      using_unique_constraint=1;
+      /*
+        Special mode for index creation in MyISAM used to support unique
+        indexes on blobs with arbitrary length. Such indexes cannot be
+        used for lookups.
+      */
       share->uniques= 1;
     }
+    null_pack_length-=hidden_null_pack_length;
+    keyinfo->key_parts= ((field_count-param->hidden_field_count)+
+			 (share->uniques ? test(null_pack_length) : 0));
+    table->distinct= 1;
+    share->keys= 1;
     if (!(key_part_info= (KEY_PART_INFO*)
           alloc_root(&table->mem_root,
                      keyinfo->key_parts * sizeof(KEY_PART_INFO))))
       goto err;
     bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO));
-    table->key_info=keyinfo;
+    table->keys_in_use_for_query.set_bit(0);
+    share->keys_in_use.set_bit(0);
+    table->key_info= table->s->key_info= keyinfo;
     keyinfo->key_part=key_part_info;
-    keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL;
-    keyinfo->key_length=(uint16) reclength;
+    keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY | HA_PACK_KEY;
+    keyinfo->key_length= 0;  // Will compute the sum of the parts below.
     keyinfo->name= (char*) "distinct_key";
     keyinfo->algorithm= HA_KEY_ALG_UNDEF;
     keyinfo->rec_per_key=0;
-    if (null_pack_length)
+
+    /*
+      Create an extra field to hold NULL bits so that unique indexes on
+      blobs can distinguish NULL from 0. This extra field is not needed
+      when we do not use UNIQUE indexes for blobs.
+    */
+    if (null_pack_length && share->uniques)
     {
       key_part_info->null_bit=0;
       key_part_info->offset=hidden_null_pack_length;
@@ -10869,10 +14084,34 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 	 i < field_count;
 	 i++, reg_field++, key_part_info++)
     {
-      key_part_info->null_bit=0;
       key_part_info->field=    *reg_field;
+      (*reg_field)->flags |= PART_KEY_FLAG;
+      if (key_part_info == keyinfo->key_part)
+        (*reg_field)->key_start.set_bit(0);
+      key_part_info->null_bit= (*reg_field)->null_bit;
+      key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
+                                          (uchar*) table->record[0]);
+
       key_part_info->offset=   (*reg_field)->offset(table->record[0]);
       key_part_info->length=   (uint16) (*reg_field)->pack_length();
+      /* TODO:
+        The below method of computing the key format length of the
+        key part is a copy/paste from opt_range.cc, and table.cc.
+        This should be factored out, e.g. as a method of Field.
+        In addition it is not clear if any of the Field::*_length
+        methods is supposed to compute the same length. If so, it
+        might be reused.
+      */
+      key_part_info->store_length= key_part_info->length;
+
+      if ((*reg_field)->real_maybe_null())
+        key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+      if ((*reg_field)->type() == MYSQL_TYPE_BLOB || 
+          (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR)
+        key_part_info->store_length+= HA_KEY_BLOB_LENGTH;
+
+      keyinfo->key_length+= key_part_info->store_length;
+
       key_part_info->type=     (uint8) (*reg_field)->key_type();
       key_part_info->key_type =
 	((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
@@ -10885,14 +14124,21 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   if (thd->is_fatal_error)				// If end of memory
     goto err;					 /* purecov: inspected */
   share->db_record_offset= 1;
-  if (share->db_type() == myisam_hton)
+  table->used_for_duplicate_elimination= (param->sum_func_count == 0 &&
+                                          (table->group || table->distinct));
+
+  if (!do_not_open)
   {
-    if (create_myisam_tmp_table(table, param, select_options,
-                                thd->variables.big_tables))
+    if (share->db_type() == TMP_ENGINE_HTON)
+    {
+      if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo,
+                                    &param->recinfo, select_options,
+                                    thd->variables.big_tables))
+        goto err;
+    }
+    if (open_tmp_table(table))
       goto err;
   }
-  if (open_tmp_table(table))
-    goto err;
 
   thd->mem_root= mem_root_save;
 
@@ -10907,6 +14153,7 @@ err:
 }
 
 
+
 /****************************************************************************/
 
 /**
@@ -10947,7 +14194,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
                         &share, sizeof(*share),
                         &field, (field_count + 1) * sizeof(Field*),
                         &blob_field, (field_count+1) *sizeof(uint),
-                        &bitmaps, bitmap_buffer_size(field_count)*2,
+                        &bitmaps, bitmap_buffer_size(field_count)*4,
                         NullS))
     return 0;
 
@@ -10959,7 +14206,6 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
   share->blob_field= blob_field;
   share->fields= field_count;
   share->blob_ptr_size= portable_sizeof_char_ptr;
-  share->db_low_byte_first=1;                // True for HEAP and MyISAM
   setup_tmp_table_column_bitmaps(table, bitmaps);
 
   /* Create all fields and calculate the total length of record */
@@ -10999,7 +14245,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
   {
     table->null_flags= (uchar*) table->record[0];
     share->null_fields= null_count;
-    share->null_bytes= null_pack_length;
+    share->null_bytes= share->null_bytes_for_compare= null_pack_length;
   }
 
   table->in_use= thd;           /* field->reset() may access table->in_use */
@@ -11049,30 +14295,243 @@ error:
 }
 
 
-static bool open_tmp_table(TABLE *table)
+bool open_tmp_table(TABLE *table)
 {
   int error;
-  if ((error=table->file->ha_open(table, table->s->table_name.str,O_RDWR,
-                                  HA_OPEN_TMP_TABLE | HA_OPEN_INTERNAL_TABLE)))
+  if ((error= table->file->ha_open(table, table->s->table_name.str, O_RDWR,
+                                   HA_OPEN_TMP_TABLE |
+                                   HA_OPEN_INTERNAL_TABLE)))
   {
     table->file->print_error(error,MYF(0)); /* purecov: inspected */
     table->db_stat=0;
     return(1);
   }
+  table->db_stat= HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
   (void) table->file->extra(HA_EXTRA_QUICK);		/* Faster */
+  table->created= TRUE;
   return(0);
 }
 
 
-static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
-				    ulonglong options, my_bool big_tables)
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_ARIA_FOR_TMP_TABLES)
+
+/*
+  Create internal (MyISAM or Maria) temporary table
+
+  SYNOPSIS
+    create_internal_tmp_table()
+      table           Table object that descrimes the table to be created
+      keyinfo         Description of the index (there is always one index)
+      start_recinfo   engine's column descriptions
+      recinfo INOUT   End of engine's column descriptions
+      options         Option bits
+   
+  DESCRIPTION
+    Create an internal emporary table according to passed description. The is
+    assumed to have one unique index or constraint.
+
+    The passed array or ENGINE_COLUMNDEF structures must have this form:
+
+      1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
+         when there are many nullable columns)
+      2. Table columns
+      3. One free ENGINE_COLUMNDEF element (*recinfo points here)
+   
+    This function may use the free element to create hash column for unique
+    constraint.
+
+   RETURN
+     FALSE - OK
+     TRUE  - Error
+*/
+
+
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options, my_bool big_tables)
+{
+  int error;
+  MARIA_KEYDEF keydef;
+  MARIA_UNIQUEDEF uniquedef;
+  TABLE_SHARE *share= table->s;
+  MARIA_CREATE_INFO create_info;
+  DBUG_ENTER("create_internal_tmp_table");
+
+  if (share->keys)
+  {						// Get keys for ni_create
+    bool using_unique_constraint=0;
+    HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root,
+                                            sizeof(*seg) * keyinfo->key_parts);
+    if (!seg)
+      goto err;
+
+    bzero(seg, sizeof(*seg) * keyinfo->key_parts);
+    if (keyinfo->key_length >= table->file->max_key_length() ||
+	keyinfo->key_parts > table->file->max_key_parts() ||
+	share->uniques)
+    {
+      /* Can't create a key; Make a unique constraint instead of a key */
+      share->keys=    0;
+      share->uniques= 1;
+      using_unique_constraint=1;
+      bzero((char*) &uniquedef,sizeof(uniquedef));
+      uniquedef.keysegs=keyinfo->key_parts;
+      uniquedef.seg=seg;
+      uniquedef.null_are_equal=1;
+
+      /* Create extra column for hash value */
+      bzero((uchar*) *recinfo,sizeof(**recinfo));
+      (*recinfo)->type=   FIELD_CHECK;
+      (*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH;
+      (*recinfo)++;
+      share->reclength+=      MARIA_UNIQUE_HASH_LENGTH;
+    }
+    else
+    {
+      /* Create an unique key */
+      bzero((char*) &keydef,sizeof(keydef));
+      keydef.flag=HA_NOSAME;
+      keydef.keysegs=  keyinfo->key_parts;
+      keydef.seg= seg;
+    }
+    for (uint i=0; i < keyinfo->key_parts ; i++,seg++)
+    {
+      Field *field=keyinfo->key_part[i].field;
+      seg->flag=     0;
+      seg->language= field->charset()->number;
+      seg->length=   keyinfo->key_part[i].length;
+      seg->start=    keyinfo->key_part[i].offset;
+      if (field->flags & BLOB_FLAG)
+      {
+	seg->type=
+	((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
+	 HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
+	seg->bit_start= (uint8)(field->pack_length() - share->blob_ptr_size);
+	seg->flag= HA_BLOB_PART;
+	seg->length=0;			// Whole blob in unique constraint
+      }
+      else
+      {
+	seg->type= keyinfo->key_part[i].type;
+        /* Tell handler if it can do suffic space compression */
+	if (field->real_type() == MYSQL_TYPE_STRING &&
+	    keyinfo->key_part[i].length > 32)
+	  seg->flag|= HA_SPACE_PACK;
+      }
+      if (!(field->flags & NOT_NULL_FLAG))
+      {
+	seg->null_bit= field->null_bit;
+	seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
+	/*
+	  We are using a GROUP BY on something that contains NULL
+	  In this case we have to tell Aria that two NULL should
+	  on INSERT be regarded at the same value
+	*/
+	if (!using_unique_constraint)
+	  keydef.flag|= HA_NULL_ARE_EQUAL;
+      }
+    }
+  }
+  bzero((char*) &create_info,sizeof(create_info));
+
+  if (big_tables && !(options & SELECT_SMALL_RESULT))
+    create_info.data_file_length= ~(ulonglong) 0;
+
+  /*
+    The logic for choosing the record format:
+    The STATIC_RECORD format is the fastest one, because it's so simple,
+    so we use this by default for short rows.
+    BLOCK_RECORD caches both row and data, so this is generally faster than
+    DYNAMIC_RECORD. The one exception is when we write to tmp table and
+    want to use keys for duplicate elimination as with BLOCK RECORD
+    we first write the row, then check for key conflicts and then we have to
+    delete the row.  The cases when this can happen is when there is
+    a group by and no sum functions or if distinct is used.
+  */
+  if ((error= maria_create(share->table_name.str,
+                           table->no_rows ? NO_RECORD :
+                           (share->reclength < 64 &&
+                            !share->blob_fields ? STATIC_RECORD :
+                            table->used_for_duplicate_elimination ?
+                            DYNAMIC_RECORD : BLOCK_RECORD),
+                           share->keys, &keydef,
+                           (uint) (*recinfo-start_recinfo),
+                           start_recinfo,
+                           share->uniques, &uniquedef,
+                           &create_info,
+                           HA_CREATE_TMP_TABLE)))
+  {
+    table->file->print_error(error,MYF(0));	/* purecov: inspected */
+    table->db_stat=0;
+    goto err;
+  }
+  status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
+  table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
+  share->db_record_offset= 1;
+  DBUG_RETURN(0);
+ err:
+  DBUG_RETURN(1);
+}
+
+
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error,
+                                         bool ignore_last_dupp_key_error)
+{
+  return create_internal_tmp_table_from_heap2(thd, table, 
+                                              start_recinfo, recinfo, error,
+                                              ignore_last_dupp_key_error,
+                                              maria_hton,
+                                              "converting HEAP to Aria");
+}
+
+#else
+
+/*
+  Create internal (MyISAM or Maria) temporary table
+
+  SYNOPSIS
+    create_internal_tmp_table()
+      table           Table object that descrimes the table to be created
+      keyinfo         Description of the index (there is always one index)
+      start_recinfo   engine's column descriptions
+      recinfo INOUT   End of engine's column descriptions
+      options         Option bits
+   
+  DESCRIPTION
+    Create an internal emporary table according to passed description. The is
+    assumed to have one unique index or constraint.
+
+    The passed array or ENGINE_COLUMNDEF structures must have this form:
+
+      1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
+         when there are many nullable columns)
+      2. Table columns
+      3. One free ENGINE_COLUMNDEF element (*recinfo points here)
+   
+    This function may use the free element to create hash column for unique
+    constraint.
+
+   RETURN
+     FALSE - OK
+     TRUE  - Error
+*/
+
+/* Create internal MyISAM temporary table */
+
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo,
+                               ulonglong options, my_bool big_tables)
 {
   int error;
   MI_KEYDEF keydef;
   MI_UNIQUEDEF uniquedef;
-  KEY *keyinfo=param->keyinfo;
   TABLE_SHARE *share= table->s;
-  DBUG_ENTER("create_myisam_tmp_table");
+  DBUG_ENTER("create_internal_tmp_table");
 
   if (share->keys)
   {						// Get keys for ni_create
@@ -11097,10 +14556,10 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
       uniquedef.null_are_equal=1;
 
       /* Create extra column for hash value */
-      bzero((uchar*) param->recinfo,sizeof(*param->recinfo));
-      param->recinfo->type= FIELD_CHECK;
-      param->recinfo->length=MI_UNIQUE_HASH_LENGTH;
-      param->recinfo++;
+      bzero((uchar*) *recinfo,sizeof(**recinfo));
+      (*recinfo)->type= FIELD_CHECK;
+      (*recinfo)->length=MI_UNIQUE_HASH_LENGTH;
+      (*recinfo)++;
       share->reclength+=MI_UNIQUE_HASH_LENGTH;
     }
     else
@@ -11156,8 +14615,8 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
     create_info.data_file_length= ~(ulonglong) 0;
 
   if ((error=mi_create(share->table_name.str, share->keys, &keydef,
-		       (uint) (param->recinfo-param->start_recinfo),
-		       param->start_recinfo,
+		       (uint) (*recinfo-start_recinfo),
+		       start_recinfo,
 		       share->uniques, &uniquedef,
 		       &create_info,
 		       HA_CREATE_TMP_TABLE)))
@@ -11168,64 +14627,53 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
   }
   status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
   share->db_record_offset= 1;
+  table->created= TRUE;
   DBUG_RETURN(0);
  err:
   DBUG_RETURN(1);
 }
 
 
-void
-free_tmp_table(THD *thd, TABLE *entry)
-{
-  MEM_ROOT own_root= entry->mem_root;
-  const char *save_proc_info;
-  DBUG_ENTER("free_tmp_table");
-  DBUG_PRINT("enter",("table: %s",entry->alias));
-
-  save_proc_info=thd->proc_info;
-  thd_proc_info(thd, "removing tmp table");
-
-  // Release latches since this can take a long time
-  ha_release_temporary_latches(thd);
-
-  if (entry->file)
-  {
-    if (entry->db_stat)
-      entry->file->ha_drop_table(entry->s->table_name.str);
-    else
-      entry->file->ha_delete_table(entry->s->table_name.str);
-    delete entry->file;
-  }
-
-  /* free blobs */
-  for (Field **ptr=entry->field ; *ptr ; ptr++)
-    (*ptr)->free();
-  free_io_cache(entry);
-
-  if (entry->temp_pool_slot != MY_BIT_NONE)
-    bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
+/**
+  If a HEAP table gets full, create a MyISAM table and copy all rows to this
+*/
 
-  plugin_unlock(0, entry->s->db_plugin);
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error,
+                                         bool ignore_last_dupp_key_error)
+{
+  return create_internal_tmp_table_from_heap2(thd, table, 
+                                              start_recinfo, recinfo, error,
+                                              ignore_last_dupp_key_error,
+                                              myisam_hton,
+                                              "converting HEAP to MyISAM");
+}
 
-  free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
-  thd_proc_info(thd, save_proc_info);
+#endif /* WITH_MARIA_STORAGE_ENGINE */
 
-  DBUG_VOID_RETURN;
-}
 
-/**
-  If a HEAP table gets full, create a MyISAM table and copy all rows
-  to this.
+/*
+  If a HEAP table gets full, create a internal table in MyISAM or Maria
+  and copy all rows to this
 */
 
-bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
-			     int error, bool ignore_last_dupp_key_error)
+
+static bool
+create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
+                                     ENGINE_COLUMNDEF *start_recinfo,
+                                     ENGINE_COLUMNDEF **recinfo, 
+                                     int error,
+                                     bool ignore_last_dupp_key_error,
+                                     handlerton *hton,
+                                     const char *proc_info)
 {
   TABLE new_table;
   TABLE_SHARE share;
   const char *save_proc_info;
-  int write_err;
-  DBUG_ENTER("create_myisam_from_heap");
+  int write_err= 0;
+  DBUG_ENTER("create_internal_tmp_table_from_heap2");
 
   if (table->s->db_type() != heap_hton || 
       error != HA_ERR_RECORD_FILE_FULL)
@@ -11237,64 +14685,62 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
     table->file->print_error(error, MYF(ME_FATALERROR));
     DBUG_RETURN(1);
   }
-
-  // Release latches since this can take a long time
-  ha_release_temporary_latches(thd);
-
   new_table= *table;
   share= *table->s;
   new_table.s= &share;
-  new_table.s->db_plugin= ha_lock_engine(thd, myisam_hton);
+  new_table.s->db_plugin= ha_lock_engine(thd, hton);
   if (!(new_table.file= get_new_handler(&share, &new_table.mem_root,
                                         new_table.s->db_type())))
     DBUG_RETURN(1);				// End of memory
 
   save_proc_info=thd->proc_info;
-  thd_proc_info(thd, "converting HEAP to MyISAM");
+  thd_proc_info(thd, proc_info);
 
-  if (create_myisam_tmp_table(&new_table, param,
-			      thd->lex->select_lex.options | thd->variables.option_bits,
-                              thd->variables.big_tables))
+  new_table.no_rows= table->no_rows;
+  if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo,
+                                recinfo,
+                                thd->lex->select_lex.options | 
+			        thd->variables.option_bits,
+                                thd->variables.big_tables))
     goto err2;
   if (open_tmp_table(&new_table))
     goto err1;
   if (table->file->indexes_are_disabled())
     new_table.file->ha_disable_indexes(HA_KEY_SWITCH_ALL);
   table->file->ha_index_or_rnd_end();
-  table->file->ha_rnd_init(1);
-  if (table->no_rows)
-  {
+  if (table->file->ha_rnd_init_with_error(1))
+    DBUG_RETURN(1);
+  if (new_table.no_rows)
     new_table.file->extra(HA_EXTRA_NO_ROWS);
-    new_table.no_rows=1;
+  else
+  {
+    /* update table->file->stats.records */
+    table->file->info(HA_STATUS_VARIABLE);
+    new_table.file->ha_start_bulk_insert(table->file->stats.records);
   }
 
-#ifdef TO_BE_DONE_LATER_IN_4_1
-  /*
-    To use start_bulk_insert() (which is new in 4.1) we need to find
-    all places where a corresponding end_bulk_insert() should be put.
-  */
-  table->file->info(HA_STATUS_VARIABLE); /* update table->file->stats.records */
-  new_table.file->ha_start_bulk_insert(table->file->stats.records);
-#else
-  /* HA_EXTRA_WRITE_CACHE can stay until close, no need to disable it */
-  new_table.file->extra(HA_EXTRA_WRITE_CACHE);
-#endif
-
   /*
     copy all old rows from heap table to MyISAM table
     This is the only code that uses record[1] to read/write but this
     is safe as this is a temporary MyISAM table without timestamp/autoincrement
     or partitioning.
   */
-  while (!table->file->rnd_next(new_table.record[1]))
+  while (!table->file->ha_rnd_next(new_table.record[1]))
   {
-    write_err= new_table.file->ha_write_row(new_table.record[1]);
+    write_err= new_table.file->ha_write_tmp_row(new_table.record[1]);
     DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;);
     if (write_err)
       goto err;
+    if (thd->killed)
+    {
+      thd->send_kill_message();
+      goto err_killed;
+    }
   }
+  if (!new_table.no_rows && new_table.file->ha_end_bulk_insert())
+    goto err;
   /* copy row that filled HEAP table */
-  if ((write_err=new_table.file->ha_write_row(table->record[0])))
+  if ((write_err=new_table.file->ha_write_tmp_row(table->record[0])))
   {
     if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) ||
 	!ignore_last_dupp_key_error)
@@ -11303,11 +14749,11 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
 
   /* remove heap table and change to use myisam table */
   (void) table->file->ha_rnd_end();
-  (void) table->file->close();                  // This deletes the table !
+  (void) table->file->ha_close();          // This deletes the table !
   delete table->file;
   table->file=0;
   plugin_unlock(0, table->s->db_plugin);
-  share.db_plugin= my_plugin_lock(0, &share.db_plugin);
+  share.db_plugin= my_plugin_lock(0, share.db_plugin);
   new_table.s= table->s;                       // Keep old share
   *table= new_table;
   *table->s= share;
@@ -11315,15 +14761,16 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
   table->file->change_table_ptr(table, table->s);
   table->use_all_columns();
   if (save_proc_info)
-    thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ?
-                  "Copying to tmp table on disk" : save_proc_info));
+    thd_proc_info(thd, save_proc_info == copy_to_tmp_table ?
+                  "Copying to tmp table on disk" : save_proc_info);
   DBUG_RETURN(0);
 
  err:
   DBUG_PRINT("error",("Got error: %d",write_err));
   table->file->print_error(write_err, MYF(0));
+err_killed:
   (void) table->file->ha_rnd_end();
-  (void) new_table.file->close();
+  (void) new_table.file->ha_close();
  err1:
   new_table.file->ha_delete_table(new_table.s->table_name.str);
  err2:
@@ -11334,6 +14781,45 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
 }
 
 
+void
+free_tmp_table(THD *thd, TABLE *entry)
+{
+  MEM_ROOT own_root= entry->mem_root;
+  const char *save_proc_info;
+  DBUG_ENTER("free_tmp_table");
+  DBUG_PRINT("enter",("table: %s",entry->alias.c_ptr()));
+
+  save_proc_info=thd->proc_info;
+  thd_proc_info(thd, "removing tmp table");
+
+  if (entry->file && entry->created)
+  {
+    entry->file->ha_index_or_rnd_end();
+    if (entry->db_stat)
+      entry->file->ha_drop_table(entry->s->table_name.str);
+    else
+      entry->file->ha_delete_table(entry->s->table_name.str);
+    delete entry->file;
+  }
+
+  /* free blobs */
+  for (Field **ptr=entry->field ; *ptr ; ptr++)
+    (*ptr)->free();
+  free_io_cache(entry);
+
+  if (entry->temp_pool_slot != MY_BIT_NONE)
+    bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
+
+  plugin_unlock(0, entry->s->db_plugin);
+  entry->alias.free();
+
+  free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
+  thd_proc_info(thd, save_proc_info);
+
+  DBUG_VOID_RETURN;
+}
+
+
 /**
   @details
   Rows produced by a join sweep may end up in a temporary table or be sent
@@ -11423,14 +14909,14 @@ Next_select_func setup_end_select_func(JOIN *join)
   @retval
     -1  if error should be sent
 */
-
 static int
 do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 {
   int rc= 0;
   enum_nested_loop_state error= NESTED_LOOP_OK;
-  JOIN_TAB *join_tab= NULL;
+  JOIN_TAB *join_tab;
   DBUG_ENTER("do_select");
+  LINT_INIT(join_tab);
   
   join->procedure=procedure;
   join->tmp_table= table;			/* Save for easy recursion */
@@ -11442,18 +14928,24 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     empty_record(table);
     if (table->group && join->tmp_table_param.sum_func_count &&
         table->s->keys && !table->file->inited)
-      table->file->ha_index_init(0, 0);
+    {
+      int tmp_error;
+      if ((tmp_error= table->file->ha_index_init(0, 0)))
+      {
+        table->file->print_error(tmp_error, MYF(0)); /* purecov: inspected */
+        DBUG_RETURN(-1);                             /* purecov: inspected */
+      }
+    }
   }
   /* Set up select_end */
   Next_select_func end_select= setup_end_select_func(join);
-  if (join->tables)
+  if (join->table_count)
   {
-    join->join_tab[join->tables-1].next_select= end_select;
-
+    join->join_tab[join->top_join_tab_count - 1].next_select= end_select;
     join_tab=join->join_tab+join->const_tables;
   }
   join->send_records=0;
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
   {
     /*
       HAVING will be checked after processing aggregate functions,
@@ -11475,9 +14967,12 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     }
     else if (join->send_row_on_empty_set())
     {
-      List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
-                                 fields);
-      rc= join->result->send_data(*columns_list);
+      if (!join->having || join->having->val_int())
+      {
+        List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
+                                   fields);
+        rc= join->result->send_data(*columns_list) > 0;
+      }
     }
     /*
       An error can happen when evaluating the conds 
@@ -11489,8 +14984,11 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
   }
   else
   {
-    DBUG_ASSERT(join->tables);
-    error= sub_select(join,join_tab,0);
+    DBUG_ASSERT(join->table_count);
+    if (join->outer_ref_cond && !join->outer_ref_cond->val_int())
+      error= NESTED_LOOP_NO_MORE_ROWS;
+    else
+      error= sub_select(join,join_tab,0);
     if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS)
       error= sub_select(join,join_tab,1);
     if (error == NESTED_LOOP_QUERY_LIMIT)
@@ -11499,7 +14997,6 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
   if (error == NESTED_LOOP_NO_MORE_ROWS)
     error= NESTED_LOOP_OK;
 
-
   if (table)
   {
     int tmp, new_errno= 0;
@@ -11549,33 +15046,98 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 }
 
 
+int rr_sequential_and_unpack(READ_RECORD *info)
+{
+  int error;
+  if ((error= rr_sequential(info)))
+    return error;
+  
+  for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+    (*cp->do_copy)(cp);
+
+  return error;
+}
+
+
+/*
+  Fill the join buffer with partial records, retrieve all full  matches for them   
+
+  SYNOPSIS
+    sub_select_cache()
+      join     pointer to the structure providing all context info for the query
+      join_tab the first next table of the execution plan to be retrieved
+      end_records  true when we need to perform final steps of the retrieval
+
+  DESCRIPTION
+    For a given table Ti= join_tab from the sequence of tables of the chosen 
+    execution plan T1,...,Ti,...,Tn the function just put the partial record
+    t1,...,t[i-1] into the join buffer associated with table Ti unless this
+    is the last record added into the buffer. In this case,  the function 
+    additionally finds all matching full records for all partial
+    records accumulated in the buffer, after which it cleans the buffer up.
+    If a partial join record t1,...,ti is extended utilizing a dynamic
+    range scan then it is not put into the join buffer. Rather all matching
+    records are found for it at once by the function sub_select.
+
+  NOTES
+    The function implements the algorithmic schema for both Blocked Nested
+    Loop Join and Batched Key Access Join. The difference can be seen only at
+    the level of of the implementation of the put_record and join_records
+    virtual methods for the cache object associated with the join_tab.
+    The put_record method accumulates records in the cache, while the 
+    join_records method builds all matching join records and send them into
+    the output stream.  
+      
+  RETURN
+    return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
+*/ 
+
 enum_nested_loop_state
-sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
+sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
 {
   enum_nested_loop_state rc;
+  JOIN_CACHE *cache= join_tab->cache;
+
+  DBUG_ENTER("sub_select_cache");
+
+  /* This function cannot be called if join_tab has no associated join buffer */
+  DBUG_ASSERT(cache != NULL);
+
+  join_tab->cache->reset_join(join);
 
   if (end_of_records)
   {
-    rc= flush_cached_records(join,join_tab,FALSE);
+    rc= cache->join_records(FALSE);
     if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
-      rc= sub_select(join,join_tab,end_of_records);
-    return rc;
+      rc= sub_select(join, join_tab, end_of_records);
+    DBUG_RETURN(rc);
   }
-  if (join->thd->killed)		// If aborted by user
+  if (join->thd->killed)
   {
+    /* The user has aborted the execution of the query */
     join->thd->send_kill_message();
-    return NESTED_LOOP_KILLED;                   /* purecov: inspected */
+    DBUG_RETURN(NESTED_LOOP_KILLED);
   }
-  if (join_tab->use_quick != 2 || test_if_quick_select(join_tab) <= 0)
+  if (!test_if_use_dynamic_range_scan(join_tab))
   {
-    if (!store_record_in_cache(&join_tab->cache))
-      return NESTED_LOOP_OK;                     // There is more room in cache
-    return flush_cached_records(join,join_tab,FALSE);
+    if (!cache->put_record())
+      DBUG_RETURN(NESTED_LOOP_OK); 
+    /* 
+      We has decided that after the record we've just put into the buffer
+      won't add any more records. Now try to find all the matching 
+      extensions for all records in the buffer.
+    */ 
+    rc= cache->join_records(FALSE);
+    DBUG_RETURN(rc);
   }
-  rc= flush_cached_records(join, join_tab, TRUE);
+  /*
+     TODO: Check whether we really need the call below and we can't do
+           without it. If it's not the case remove it.
+  */ 
+  rc= cache->join_records(TRUE);
   if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
     rc= sub_select(join, join_tab, end_of_records);
-  return rc;
+  DBUG_RETURN(rc);
 }
 
 /**
@@ -11701,14 +15263,27 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 enum_nested_loop_state
 sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 {
+  DBUG_ENTER("sub_select");
+
   join_tab->table->null_row=0;
   if (end_of_records)
-    return (*join_tab->next_select)(join,join_tab+1,end_of_records);
-
+  {
+    enum_nested_loop_state nls=
+      (*join_tab->next_select)(join,join_tab+1,end_of_records);
+    DBUG_RETURN(nls);
+  }
   int error;
-  enum_nested_loop_state rc;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
   READ_RECORD *info= &join_tab->read_record;
 
+  if (join_tab->flush_weedout_table)
+  {
+    do_sj_reset(join_tab->flush_weedout_table);
+  }
+
+  if (!join_tab->preread_init_done && join_tab->preread_init())
+    DBUG_RETURN(NESTED_LOOP_ERROR);
+
   join->return_tab= join_tab;
 
   if (join_tab->last_inner)
@@ -11721,15 +15296,62 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 
     /* Set first_unmatched for the last inner table of this group */
     join_tab->last_inner->first_unmatched= join_tab;
+    if (join_tab->on_precond && !join_tab->on_precond->val_int())
+      rc= NESTED_LOOP_NO_MORE_ROWS;
   }
   join->thd->warning_info->reset_current_row_for_warning();
 
-  error= (*join_tab->read_first_record)(join_tab);
-  rc= evaluate_join_record(join, join_tab, error);
+  if (rc != NESTED_LOOP_NO_MORE_ROWS && 
+      (rc= join_tab_execution_startup(join_tab)) < 0)
+    DBUG_RETURN(rc);
+  
+  if (join_tab->loosescan_match_tab)
+    join_tab->loosescan_match_tab->found_match= FALSE;
+
+  if (rc != NESTED_LOOP_NO_MORE_ROWS)
+  {
+    error= (*join_tab->read_first_record)(join_tab);
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);    
+    rc= evaluate_join_record(join, join_tab, error);
+  }
 
-  while (rc == NESTED_LOOP_OK)
+  /* 
+    Note: psergey has added the 2nd part of the following condition; the 
+    change should probably be made in 5.1, too.
+  */
+  bool skip_over= FALSE;
+  while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab)
   {
+    if (join_tab->loosescan_match_tab && 
+        join_tab->loosescan_match_tab->found_match)
+    {
+      KEY *key= join_tab->table->key_info + join_tab->index;
+      key_copy(join_tab->loosescan_buf, info->record, key, 
+               join_tab->loosescan_key_len);
+      skip_over= TRUE;
+    }
+
     error= info->read_record(info);
+
+    if (skip_over && !error) 
+    {
+      if(!key_cmp(join_tab->table->key_info[join_tab->index].key_part,
+                  join_tab->loosescan_buf, join_tab->loosescan_key_len))
+      {
+        /* 
+          This is the LooseScan action: skip over records with the same key
+          value if we already had a match for them.
+        */
+        continue;
+      }
+      join_tab->loosescan_match_tab->found_match= FALSE;
+      skip_over= FALSE;
+    }
+
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);
+    
     rc= evaluate_join_record(join, join_tab, error);
   }
 
@@ -11739,16 +15361,23 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 
   if (rc == NESTED_LOOP_NO_MORE_ROWS)
     rc= NESTED_LOOP_OK;
-  return rc;
+  DBUG_RETURN(rc);
 }
 
 
 /**
-  Process one record of the nested loop join.
-
-    This function will evaluate parts of WHERE/ON clauses that are
-    applicable to the partial record on hand and in case of success
-    submit this record to the next level of the nested loop.
+  @brief Process one row of the nested loop join.
+
+  This function will evaluate parts of WHERE/ON clauses that are
+  applicable to the partial row on hand and in case of success
+  submit this row to the next level of the nested loop.
+
+  @param  join     - The join object
+  @param  join_tab - The most inner join_tab being processed
+  @param  error > 0: Error, terminate processing
+                = 0: (Partial) row is available
+                < 0: No more rows available at this level
+  @return Nested loop state (Ok, No_more_rows, Error, Killed)
 */
 
 static enum_nested_loop_state
@@ -11760,16 +15389,21 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
   COND *select_cond= join_tab->select_cond;
   bool select_cond_result= TRUE;
 
+  DBUG_ENTER("evaluate_join_record");
+  DBUG_PRINT("enter",
+             ("evaluate_join_record join: %p join_tab: %p"
+              " cond: %p error: %d", join, join_tab, select_cond, error));
   if (error > 0 || (join->thd->is_error()))     // Fatal error
-    return NESTED_LOOP_ERROR;
+    DBUG_RETURN(NESTED_LOOP_ERROR);
   if (error < 0)
-    return NESTED_LOOP_NO_MORE_ROWS;
+    DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
   if (join->thd->killed)			// Aborted by user
   {
     join->thd->send_kill_message();
-    return NESTED_LOOP_KILLED;               /* purecov: inspected */
+    DBUG_RETURN(NESTED_LOOP_KILLED);            /* purecov: inspected */
   }
-  DBUG_PRINT("info", ("select cond 0x%lx", (ulong)select_cond));
+
+  update_virtual_fields(join->thd, join_tab->table);
 
   if (select_cond)
   {
@@ -11777,7 +15411,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
 
     /* check for errors evaluating the condition */
     if (join->thd->is_error())
-      return NESTED_LOOP_ERROR;
+      DBUG_RETURN(NESTED_LOOP_ERROR);
   }
 
   if (!select_cond || select_cond_result)
@@ -11825,6 +15459,19 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
         {
           /* The condition attached to table tab is false */
 
+          if (tab == join_tab)
+          {
+            found= 0;
+          }            
+          else
+          {
+            /*
+              Set a return point if rejected predicate is attached
+              not to the last table of the current nest level.
+            */
+            join->return_tab= tab;
+          }
+
           if (tab->table->reginfo.not_exists_optimize)
           {
             /*
@@ -11833,19 +15480,11 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
               Any found 'tab' matches are known to evaluate to 'false'.
               Returning .._NO_MORE_ROWS will skip rem. 'tab' rows.
             */
-            return NESTED_LOOP_NO_MORE_ROWS;
+            DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
           }
-
-          if (tab == join_tab)
-            found= 0;
-          else
+          else if (tab != join_tab)
           {
-            /*
-              Set a return point if rejected predicate is attached
-              not to the last table of the current nest level.
-            */
-            join->return_tab= tab;
-            return NESTED_LOOP_OK;
+            DBUG_RETURN(NESTED_LOOP_OK);
           }
         }
       }
@@ -11859,6 +15498,26 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       join_tab->first_unmatched= first_unmatched;
     }
 
+    JOIN_TAB *return_tab= join->return_tab;
+    join_tab->found_match= TRUE;
+
+    if (join_tab->check_weed_out_table && found)
+    {
+      int res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table);
+      if (res == -1)
+        DBUG_RETURN(NESTED_LOOP_ERROR);
+      else if (res == 1)
+        found= FALSE;
+    }
+    else if (join_tab->do_firstmatch)
+    {
+      /* 
+        We should return to the join_tab->do_firstmatch after we have 
+        enumerated all the suffixes for current prefix row combination
+      */
+      return_tab= join_tab->do_firstmatch;
+    }
+
     /*
       It was not just a return to lower loop level when one
       of the newly activated predicates is evaluated as false
@@ -11875,16 +15534,19 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       rc= (*join_tab->next_select)(join, join_tab+1, 0);
       join->thd->warning_info->inc_current_row_for_warning();
       if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
-        return rc;
+        DBUG_RETURN(rc);
+      if (return_tab < join->return_tab)
+        join->return_tab= return_tab;
+
       if (join->return_tab < join_tab)
-        return NESTED_LOOP_OK;
+        DBUG_RETURN(NESTED_LOOP_OK);
       /*
         Test if this was a SELECT DISTINCT query on a table that
         was not in the field list;  In this case we can abort if
         we found a row, as no new rows can be added to the result.
       */
       if (not_used_in_distinct && found_records != join->found_records)
-        return NESTED_LOOP_NO_MORE_ROWS;
+        DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
     }
     else
     {
@@ -11902,10 +15564,9 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
     join->thd->warning_info->inc_current_row_for_warning();
     join_tab->read_record.unlock_row(join_tab);
   }
-  return NESTED_LOOP_OK;
+  DBUG_RETURN(NESTED_LOOP_OK);
 }
 
-
 /**
 
   @details
@@ -11966,103 +15627,32 @@ evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab)
   /*
     The row complemented by nulls satisfies all conditions
     attached to inner tables.
-    Send the row complemented by nulls to be joined with the
-    remaining tables.
   */
-  return (*join_tab->next_select)(join, join_tab+1, 0);
-}
-
-
-static enum_nested_loop_state
-flush_cached_records(JOIN *join,JOIN_TAB *join_tab,bool skip_last)
-{
-  enum_nested_loop_state rc= NESTED_LOOP_OK;
-  int error;
-  READ_RECORD *info;
-
-  join_tab->table->null_row= 0;
-  if (!join_tab->cache.records)
-    return NESTED_LOOP_OK;                      /* Nothing to do */
-  if (skip_last)
-    (void) store_record_in_cache(&join_tab->cache); // Must save this for later
-  if (join_tab->use_quick == 2)
-  {
-    if (join_tab->select->quick)
-    {					/* Used quick select last. reset it */
-      delete join_tab->select->quick;
-      join_tab->select->quick=0;
-    }
-  }
- /* read through all records */
-  if ((error=join_init_read_record(join_tab)))
+  if (join_tab->check_weed_out_table)
   {
-    reset_cache_write(&join_tab->cache);
-    return error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+    int res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table);
+    if (res == -1)
+      return NESTED_LOOP_ERROR;
+    else if (res == 1)
+      return NESTED_LOOP_OK;
   }
-
-  for (JOIN_TAB *tmp=join->join_tab; tmp != join_tab ; tmp++)
+  else if (join_tab->do_firstmatch)
   {
-    tmp->status=tmp->table->status;
-    tmp->table->status=0;
+    /* 
+      We should return to the join_tab->do_firstmatch after we have 
+      enumerated all the suffixes for current prefix row combination
+    */
+    if (join_tab->do_firstmatch < join->return_tab)
+      join->return_tab= join_tab->do_firstmatch;
   }
 
-  info= &join_tab->read_record;
-  do
-  {
-    if (join->thd->killed)
-    {
-      join->thd->send_kill_message();
-      return NESTED_LOOP_KILLED; // Aborted by user /* purecov: inspected */
-    }
-    SQL_SELECT *select=join_tab->select;
-    if (rc == NESTED_LOOP_OK)
-    {
-      bool skip_record= FALSE;
-      if (join_tab->cache.select &&
-          join_tab->cache.select->skip_record(join->thd, &skip_record))
-      {
-        reset_cache_write(&join_tab->cache);
-        return NESTED_LOOP_ERROR;
-      }
-
-      if (!skip_record)
-      {
-        uint i;
-        reset_cache_read(&join_tab->cache);
-        for (i=(join_tab->cache.records- (skip_last ? 1 : 0)) ; i-- > 0 ;)
-        {
-          read_cached_record(join_tab);
-          skip_record= FALSE;
-          if (select && select->skip_record(join->thd, &skip_record))
-          {
-            reset_cache_write(&join_tab->cache);
-            return NESTED_LOOP_ERROR;
-          }
-          if (!skip_record)
-          {
-            rc= (join_tab->next_select)(join,join_tab+1,0);
-            if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
-            {
-              reset_cache_write(&join_tab->cache);
-              return rc;
-            }
-          }
-        }
-      }
-    }
-  } while (!(error=info->read_record(info)));
-
-  if (skip_last)
-    read_cached_record(join_tab);		// Restore current record
-  reset_cache_write(&join_tab->cache);
-  if (error > 0)				// Fatal error
-    return NESTED_LOOP_ERROR;                   /* purecov: inspected */
-  for (JOIN_TAB *tmp2=join->join_tab; tmp2 != join_tab ; tmp2++)
-    tmp2->table->status=tmp2->status;
-  return NESTED_LOOP_OK;
+  /*
+    Send the row complemented by nulls to be joined with the
+    remaining tables.
+  */
+  return (*join_tab->next_select)(join, join_tab+1, 0);
 }
 
-
 /*****************************************************************************
   The different ways to read a record
   Returns -1 if row was not found, 0 if row was found and 1 on errors
@@ -12093,10 +15683,10 @@ int safe_index_read(JOIN_TAB *tab)
 {
   int error;
   TABLE *table= tab->table;
-  if ((error=table->file->index_read_map(table->record[0],
-                                         tab->ref.key_buff,
-                                         make_prev_keypart_map(tab->ref.key_parts),
-                                         HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                             tab->ref.key_buff,
+                                             make_prev_keypart_map(tab->ref.key_parts),
+                                             HA_READ_KEY_EXACT)))
     return report_error(table, error);
   return 0;
 }
@@ -12106,13 +15696,21 @@ static int
 join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 {
   int error;
+  TABLE_LIST *tbl;
   DBUG_ENTER("join_read_const_table");
   TABLE *table=tab->table;
   table->const_table=1;
   table->null_row=0;
   table->status=STATUS_NO_RECORD;
   
-  if (tab->type == JT_SYSTEM)
+  if (tab->table->pos_in_table_list->is_materialized_derived() &&
+      !tab->table->pos_in_table_list->fill_me)
+  {
+    //TODO: don't get here at all
+    /* Skip materialized derived tables/views. */
+    DBUG_RETURN(0);
+  }
+  else if (tab->type == JT_SYSTEM)
   {
     if ((error=join_read_system(tab)))
     {						// Info for DESCRIBE
@@ -12123,6 +15721,11 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
       if (!table->pos_in_table_list->outer_join || error > 0)
 	DBUG_RETURN(error);
     }
+    /*
+      The optimizer trust the engine that when stats.records is 0, there
+      was no found rows
+    */
+    DBUG_ASSERT(table->file->stats.records > 0 || error);
   }
   else
   {
@@ -12130,11 +15733,11 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 	!table->no_keyread &&
         (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY)
     {
-      table->set_keyread(TRUE);
+      table->enable_keyread();
       tab->index= tab->ref.key;
     }
     error=join_read_const(tab);
-    table->set_keyread(FALSE);
+    table->disable_keyread();
     if (error)
     {
       tab->info="unique row not found";
@@ -12145,34 +15748,53 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 	DBUG_RETURN(error);
     }
   }
-  if (*tab->on_expr_ref && !table->null_row)
+  /* 
+     Evaluate an on-expression only if it is not considered expensive.
+     This mainly prevents executing subqueries in optimization phase.
+     This is necessary since proper setup for such execution has not been
+     done at this stage.
+  */
+  if (*tab->on_expr_ref && !table->null_row && 
+      !(*tab->on_expr_ref)->is_expensive())
   {
+#if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL)
+    /*
+      This test could be very useful to find bugs in the optimizer
+      where we would call this function with an expression that can't be
+      evaluated yet. We can't have this enabled by default as long as
+      have items like Item_equal, that doesn't report they are const but
+      they can still be called even if they contain not const items.
+    */
+    (*tab->on_expr_ref)->update_used_tables();
+    DBUG_ASSERT((*tab->on_expr_ref)->const_item());
+#endif
     if ((table->null_row= test((*tab->on_expr_ref)->val_int() == 0)))
       mark_as_null_row(table);  
   }
   if (!table->null_row)
     table->maybe_null=0;
 
-  /* Check appearance of new constant items in Item_equal objects */
-  JOIN *join= tab->join;
-  if (join->conds)
-    update_const_equal_items(join->conds, tab);
-  TABLE_LIST *tbl;
-  for (tbl= join->select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
   {
-    TABLE_LIST *embedded;
-    TABLE_LIST *embedding= tbl;
-    do
-    {
-      embedded= embedding;
-      if (embedded->on_expr)
-         update_const_equal_items(embedded->on_expr, tab);
-      embedding= embedded->embedding;
+    JOIN *join= tab->join;
+    List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
+    /* Check appearance of new constant items in Item_equal objects */
+    if (join->conds)
+      update_const_equal_items(join->conds, tab);
+    while ((tbl= ti++))
+    {
+      TABLE_LIST *embedded;
+      TABLE_LIST *embedding= tbl;
+      do
+      {
+        embedded= embedding;
+        if (embedded->on_expr)
+           update_const_equal_items(embedded->on_expr, tab);
+        embedding= embedded->embedding;
+      }
+      while (embedding &&
+             embedding->nested_join->join_list.head() == embedded);
     }
-    while (embedding &&
-           embedding->nested_join->join_list.head() == embedded);
   }
-
   DBUG_RETURN(0);
 }
 
@@ -12184,8 +15806,8 @@ join_read_system(JOIN_TAB *tab)
   int error;
   if (table->status & STATUS_GARBAGE)		// If first read
   {
-    if ((error=table->file->read_first_row(table->record[0],
-					   table->s->primary_key)))
+    if ((error= table->file->ha_read_first_row(table->record[0],
+                                               table->s->primary_key)))
     {
       if (error != HA_ERR_END_OF_FILE)
 	return report_error(table, error);
@@ -12193,6 +15815,7 @@ join_read_system(JOIN_TAB *tab)
       empty_record(table);			// Make empty record
       return -1;
     }
+    update_virtual_fields(tab->join->thd, table);
     store_record(table,record[1]);
   }
   else if (!table->status)			// Only happens with left join
@@ -12227,10 +15850,10 @@ join_read_const(JOIN_TAB *tab)
       error=HA_ERR_KEY_NOT_FOUND;
     else
     {
-      error=table->file->index_read_idx_map(table->record[0],tab->ref.key,
-                                            (uchar*) tab->ref.key_buff,
-                                            make_prev_keypart_map(tab->ref.key_parts),
-                                            HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[0],tab->ref.key,
+                                                (uchar*) tab->ref.key_buff,
+                                                make_prev_keypart_map(tab->ref.key_parts),
+                                                HA_READ_KEY_EXACT);
     }
     if (error)
     {
@@ -12241,6 +15864,7 @@ join_read_const(JOIN_TAB *tab)
 	return report_error(table, error);
       return -1;
     }
+    update_virtual_fields(tab->join->thd, table);
     store_record(table,record[1]);
   }
   else if (!(table->status & ~STATUS_NULL_ROW))	// Only happens with left join
@@ -12252,21 +15876,49 @@ join_read_const(JOIN_TAB *tab)
   return table->status ? -1 : 0;
 }
 
+/*
+  eq_ref access method implementation: "read_first" function
+
+  SYNOPSIS
+    join_read_key()
+      tab  JOIN_TAB of the accessed table
+
+  DESCRIPTION
+    This is "read_fist" function for the eq_ref access method. The difference
+    from ref access function is that is that it has a one-element lookup 
+    cache (see cmp_buffer_with_ref)
+
+  RETURN
+    0  - Ok
+   -1  - Row not found 
+    1  - Error
+*/
+
 
 static int
 join_read_key(JOIN_TAB *tab)
 {
-  int error;
-  TABLE *table= tab->table;
+  return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref);
+}
 
+
+/*
+  eq_ref access handler but generalized a bit to support TABLE and TABLE_REF
+  not from the join_tab. See join_read_key for detailed synopsis.
+*/
+int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
+{
+  int error;
   if (!table->file->inited)
   {
-    table->file->ha_index_init(tab->ref.key, tab->sorted);
+    table->file->ha_index_init(table_ref->key, (tab ? tab->sorted : TRUE));
   }
-  if (cmp_buffer_with_ref(tab) ||
+
+  /* TODO: Why don't we do "Late NULLs Filtering" here? */
+  if (cmp_buffer_with_ref(thd, table, table_ref) ||
       (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
   {
-    if (tab->ref.key_err)
+    if (table_ref->key_err)
     {
       table->status=STATUS_NOT_FOUND;
       return -1;
@@ -12275,28 +15927,28 @@ join_read_key(JOIN_TAB *tab)
       Moving away from the current record. Unlock the row
       in the handler if it did not match the partial WHERE.
     */
-    if (tab->ref.has_record && tab->ref.use_count == 0)
+    if (tab && tab->ref.has_record && tab->ref.use_count == 0)
     {
       tab->read_record.file->unlock_row();
-      tab->ref.has_record= FALSE;
+      table_ref->has_record= FALSE;
     }
-    error=table->file->index_read_map(table->record[0],
-                                      tab->ref.key_buff,
-                                      make_prev_keypart_map(tab->ref.key_parts),
-                                      HA_READ_KEY_EXACT);
+    error=table->file->ha_index_read_map(table->record[0],
+                                  table_ref->key_buff,
+                                  make_prev_keypart_map(table_ref->key_parts),
+                                  HA_READ_KEY_EXACT);
     if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
 
     if (! error)
     {
-      tab->ref.has_record= TRUE;
-      tab->ref.use_count= 1;
+      table_ref->has_record= TRUE;
+      table_ref->use_count= 1;
     }
   }
   else if (table->status == 0)
   {
-    DBUG_ASSERT(tab->ref.has_record);
-    tab->ref.use_count++;
+    DBUG_ASSERT(table_ref->has_record);
+    table_ref->use_count++;
   }
   table->null_row=0;
   return table->status ? -1 : 0;
@@ -12347,21 +15999,20 @@ join_read_always_key(JOIN_TAB *tab)
 
   /* Initialize the index first */
   if (!table->file->inited)
-    table->file->ha_index_init(tab->ref.key, tab->sorted);
- 
-  /* Perform "Late NULLs Filtering" (see internals manual for explanations) */
-  for (uint i= 0 ; i < tab->ref.key_parts ; i++)
   {
-    if ((tab->ref.null_rejecting & 1 << i) && tab->ref.items[i]->is_null())
-        return -1;
+    if ((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))
+    {
+      table->file->print_error(error, MYF(0));/* purecov: inspected */
+      return(1);                              /* purecov: inspected */
+    }
   }
 
   if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
     return -1;
-  if ((error=table->file->index_read_map(table->record[0],
-                                         tab->ref.key_buff,
-                                         make_prev_keypart_map(tab->ref.key_parts),
-                                         HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                             tab->ref.key_buff,
+                                             make_prev_keypart_map(tab->ref.key_parts),
+                                             HA_READ_KEY_EXACT)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12383,12 +16034,19 @@ join_read_last_key(JOIN_TAB *tab)
   TABLE *table= tab->table;
 
   if (!table->file->inited)
-    table->file->ha_index_init(tab->ref.key, tab->sorted);
+  {
+    if ((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))
+    {
+      table->file->print_error(error, MYF(0));/* purecov: inspected */
+      return(1);                              /* purecov: inspected */
+    }
+  }
   if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
     return -1;
-  if ((error=table->file->index_read_last_map(table->record[0],
-                                              tab->ref.key_buff,
-                                              make_prev_keypart_map(tab->ref.key_parts))))
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                            tab->ref.key_buff,
+                                     make_prev_keypart_map(tab->ref.key_parts),
+                                            HA_READ_PREFIX_LAST)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12413,9 +16071,9 @@ join_read_next_same(READ_RECORD *info)
   TABLE *table= info->table;
   JOIN_TAB *tab=table->reginfo.join_tab;
 
-  if ((error=table->file->index_next_same(table->record[0],
-					  tab->ref.key_buff,
-					  tab->ref.key_length)))
+  if ((error= table->file->ha_index_next_same(table->record[0],
+                                              tab->ref.key_buff,
+                                              tab->ref.key_length)))
   {
     if (error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12433,7 +16091,7 @@ join_read_prev_same(READ_RECORD *info)
   TABLE *table= info->table;
   JOIN_TAB *tab=table->reginfo.join_tab;
 
-  if ((error=table->file->index_prev(table->record[0])))
+  if ((error= table->file->ha_index_prev(table->record[0])))
     return report_error(table, error);
   if (key_cmp_if_same(table, tab->ref.key_buff, tab->ref.key,
                       tab->ref.key_length))
@@ -12456,7 +16114,7 @@ join_init_quick_read_record(JOIN_TAB *tab)
 
 int read_first_record_seq(JOIN_TAB *tab)
 {
-  if (tab->read_record.file->ha_rnd_init(1))
+  if (tab->read_record.file->ha_rnd_init_with_error(1))
     return 1;
   return (*tab->read_record.read_record)(&tab->read_record);
 }
@@ -12467,28 +16125,59 @@ test_if_quick_select(JOIN_TAB *tab)
   delete tab->select->quick;
   tab->select->quick=0;
   return tab->select->test_quick_select(tab->join->thd, tab->keys,
-					(table_map) 0, HA_POS_ERROR, 0);
+					(table_map) 0, HA_POS_ERROR, 0,
+                                        FALSE);
 }
 
 
-static int
-join_init_read_record(JOIN_TAB *tab)
+static 
+bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
+{
+    return (join_tab->use_quick == 2 && test_if_quick_select(join_tab) > 0);
+}
+
+int join_init_read_record(JOIN_TAB *tab)
 {
   if (tab->select && tab->select->quick && tab->select->quick->reset())
     return 1;
+  if (!tab->preread_init_done && tab->preread_init())
+    return 1;
+  if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
+                       tab->select,1,1, FALSE))
+    return 1;
+  return (*tab->read_record.read_record)(&tab->read_record);
+}
+
+int
+join_read_record_no_init(JOIN_TAB *tab)
+{
+  Copy_field *save_copy, *save_copy_end;
+  
+  /*
+    init_read_record resets all elements of tab->read_record().
+    Remember things that we don't want to have reset.
+  */
+  save_copy=     tab->read_record.copy_field;
+  save_copy_end= tab->read_record.copy_field_end;
+  
   init_read_record(&tab->read_record, tab->join->thd, tab->table,
 		   tab->select,1,1, FALSE);
+
+  tab->read_record.copy_field=     save_copy;
+  tab->read_record.copy_field_end= save_copy_end;
+  tab->read_record.read_record= rr_sequential_and_unpack;
+
   return (*tab->read_record.read_record)(&tab->read_record);
 }
 
-
 static int
 join_read_first(JOIN_TAB *tab)
 {
-  int error;
+  int error= 0;
   TABLE *table=tab->table;
-  if (table->covering_keys.is_set(tab->index) && !table->no_keyread)
-    table->set_keyread(TRUE);
+  if (table->covering_keys.is_set(tab->index) && !table->no_keyread &&
+      !table->key_read)
+    table->enable_keyread();
   tab->table->status=0;
   tab->read_record.read_record=join_read_next;
   tab->read_record.table=table;
@@ -12496,8 +16185,10 @@ join_read_first(JOIN_TAB *tab)
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
   if (!table->file->inited)
-    table->file->ha_index_init(tab->index, tab->sorted);
-  if ((error=tab->table->file->index_first(tab->table->record[0])))
+    error= table->file->ha_index_init(tab->index, tab->sorted);
+  if (!error)
+    error= table->file->prepare_index_scan();
+  if (error || (error=tab->table->file->ha_index_first(tab->table->record[0])))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       report_error(table, error);
@@ -12511,8 +16202,9 @@ static int
 join_read_next(READ_RECORD *info)
 {
   int error;
-  if ((error=info->file->index_next(info->record)))
+  if ((error= info->file->ha_index_next(info->record)))
     return report_error(info->table, error);
+
   return 0;
 }
 
@@ -12521,9 +16213,10 @@ static int
 join_read_last(JOIN_TAB *tab)
 {
   TABLE *table=tab->table;
-  int error;
-  if (table->covering_keys.is_set(tab->index) && !table->no_keyread)
-    table->set_keyread(TRUE);
+  int error= 0;
+  if (table->covering_keys.is_set(tab->index) && !table->no_keyread &&
+      !table->key_read)
+    table->enable_keyread();
   tab->table->status=0;
   tab->read_record.read_record=join_read_prev;
   tab->read_record.table=table;
@@ -12531,9 +16224,12 @@ join_read_last(JOIN_TAB *tab)
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
   if (!table->file->inited)
-    table->file->ha_index_init(tab->index, 1);
-  if ((error= tab->table->file->index_last(tab->table->record[0])))
+    error= table->file->ha_index_init(tab->index, 1);
+  if (!error)
+    error= table->file->prepare_index_scan();
+  if (error || (error= tab->table->file->ha_index_last(tab->table->record[0])))
     return report_error(table, error);
+
   return 0;
 }
 
@@ -12542,7 +16238,7 @@ static int
 join_read_prev(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->index_prev(info->record)))
+  if ((error= info->file->ha_index_prev(info->record)))
     return report_error(info->table, error);
   return 0;
 }
@@ -12554,11 +16250,15 @@ join_ft_read_first(JOIN_TAB *tab)
   int error;
   TABLE *table= tab->table;
 
-  if (!table->file->inited)
-    table->file->ha_index_init(tab->ref.key, 1);
+  if (!table->file->inited &&
+      (error= table->file->ha_index_init(tab->ref.key, 1)))
+  {
+    table->file->print_error(error, MYF(0));  /* purecov: inspected */
+    return(1);                                /* purecov: inspected */
+  }
   table->file->ft_init();
 
-  if ((error= table->file->ft_read(table->record[0])))
+  if ((error= table->file->ha_ft_read(table->record[0])))
     return report_error(table, error);
   return 0;
 }
@@ -12567,7 +16267,7 @@ static int
 join_ft_read_next(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->ft_read(info->table->record[0])))
+  if ((error= info->file->ha_ft_read(info->table->record[0])))
     return report_error(info->table, error);
   return 0;
 }
@@ -12642,8 +16342,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   DBUG_ENTER("end_send");
   if (!end_of_records)
   {
-    int error;
-    if (join->tables &&
+    if (join->table_count &&
         join->join_tab->is_using_loose_index_scan())
     {
       /* Copy non-aggregated fields when loose index scan is used. */
@@ -12657,18 +16356,20 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
         DBUG_RETURN(NESTED_LOOP_ERROR);
       DBUG_RETURN(NESTED_LOOP_OK);
     }
-    error=0;
     if (join->do_send_rows)
-      error=join->result->send_data(*join->fields);
-    if (error)
-      DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    {
+      int error;
+      /* result < 0 if row was not accepted and should not be counted */
+      if ((error= join->result->send_data(*join->fields)))
+        DBUG_RETURN(error < 0 ? NESTED_LOOP_OK : NESTED_LOOP_ERROR);
+    }
     if (++join->send_records >= join->unit->select_limit_cnt &&
 	join->do_send_rows)
     {
       if (join->select_options & OPTION_FOUND_ROWS)
       {
 	JOIN_TAB *jt=join->join_tab;
-	if ((join->tables == 1) && !join->tmp_table && !join->sort_and_group
+	if ((join->table_count == 1) && !join->tmp_table && !join->sort_and_group
 	    && !join->send_group_parts && !join->having && !jt->select_cond &&
 	    !(jt->select && jt->select->quick) &&
 	    (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
@@ -12719,7 +16420,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
 
 	/* ARGSUSED */
-static enum_nested_loop_state
+enum_nested_loop_state
 end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	       bool end_of_records)
 {
@@ -12757,8 +16458,11 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  {
             List_iterator_fast<Item> it(*join->fields);
             Item *item;
+            DBUG_PRINT("info", ("no matching rows"));
+
 	    /* No matching rows for group function */
 	    join->clear();
+            join->no_rows_in_result_called= 1;
 
             while ((item= it++))
               item->no_rows_in_result();
@@ -12768,7 +16472,15 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  else
 	  {
 	    if (join->do_send_rows)
-	      error=join->result->send_data(*join->fields) ? 1 : 0;
+            {
+	      error= join->result->send_data(*join->fields);
+              if (error < 0)
+              {
+                /* Duplicate row, don't count */
+                join->send_records--;
+                error= 0;
+              }
+            }
 	    join->send_records++;
 	  }
 	  if (join->rollup.state != ROLLUP::STATE_NONE && error <= 0)
@@ -12855,12 +16567,14 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     {
       int error;
       join->found_records++;
-      if ((error=table->file->ha_write_row(table->record[0])))
+      if ((error= table->file->ha_write_tmp_row(table->record[0])))
       {
         if (!table->file->is_fatal_error(error, HA_CHECK_DUP))
 	  goto end;
-	if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
-				    error,1))
+	if (create_internal_tmp_table_from_heap(join->thd, table, 
+                                                join->tmp_table_param.start_recinfo,
+                                                &join->tmp_table_param.recinfo,
+                                                error,1))
 	  DBUG_RETURN(NESTED_LOOP_ERROR);        // Not a table_is_full error
 	table->s->uniques=0;			// To ensure rows are the same
       }
@@ -12910,15 +16624,15 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     if (item->maybe_null)
       group->buff[-1]= (char) group->field->is_null();
   }
-  if (!table->file->index_read_map(table->record[1],
-                                   join->tmp_table_param.group_buff,
-                                   HA_WHOLE_KEY,
-                                   HA_READ_KEY_EXACT))
+  if (!table->file->ha_index_read_map(table->record[1],
+                                      join->tmp_table_param.group_buff,
+                                      HA_WHOLE_KEY,
+                                      HA_READ_KEY_EXACT))
   {						/* Update old record */
     restore_record(table,record[1]);
     update_tmptable_sum_func(join->sum_funcs,table);
-    if ((error=table->file->ha_update_row(table->record[1],
-                                          table->record[0])))
+    if ((error= table->file->ha_update_tmp_row(table->record[1],
+                                               table->record[0])))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
@@ -12942,14 +16656,20 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   init_tmptable_sum_functions(join->sum_funcs);
   if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
-  if ((error=table->file->ha_write_row(table->record[0])))
+  if ((error= table->file->ha_write_tmp_row(table->record[0])))
   {
-    if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
-				error, 0))
+    if (create_internal_tmp_table_from_heap(join->thd, table,
+                                            join->tmp_table_param.start_recinfo,
+                                            &join->tmp_table_param.recinfo,
+                                            error, 0))
       DBUG_RETURN(NESTED_LOOP_ERROR);            // Not a table_is_full error
     /* Change method to update rows */
-    table->file->ha_index_init(0, 0);
-    join->join_tab[join->tables-1].next_select=end_unique_update;
+    if ((error= table->file->ha_index_init(0, 0)))
+    {
+      table->file->print_error(error, MYF(0));/* purecov: inspected */
+      DBUG_RETURN(NESTED_LOOP_ERROR);         /* purecov: inspected */
+    }
+    join->join_tab[join->top_join_tab_count-1].next_select=end_unique_update;
   }
   join->send_records++;
   DBUG_RETURN(NESTED_LOOP_OK);
@@ -12979,7 +16699,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
 
-  if (!(error=table->file->ha_write_row(table->record[0])))
+  if (!(error= table->file->ha_write_tmp_row(table->record[0])))
     join->send_records++;			// New group
   else
   {
@@ -12988,15 +16708,15 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
     }
-    if (table->file->rnd_pos(table->record[1],table->file->dup_ref))
+    if (table->file->ha_rnd_pos(table->record[1],table->file->dup_ref))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
     }
     restore_record(table,record[1]);
     update_tmptable_sum_func(join->sum_funcs,table);
-    if ((error=table->file->ha_update_row(table->record[1],
-                                          table->record[0])))
+    if ((error= table->file->ha_update_tmp_row(table->record[1],
+                                               table->record[0])))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
@@ -13007,7 +16727,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
 
 	/* ARGSUSED */
-static enum_nested_loop_state
+enum_nested_loop_state
 end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 		bool end_of_records)
 {
@@ -13039,10 +16759,12 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
                        join->sum_funcs_end[send_group_parts]);
 	if (!join->having || join->having->val_int())
 	{
-          int error= table->file->ha_write_row(table->record[0]);
-          if (error && create_myisam_from_heap(join->thd, table,
-                                               &join->tmp_table_param,
-                                               error, 0))
+          int error= table->file->ha_write_tmp_row(table->record[0]);
+          if (error && 
+              create_internal_tmp_table_from_heap(join->thd, table,
+                                                  join->tmp_table_param.start_recinfo,
+                                                  &join->tmp_table_param.recinfo,
+                                                  error, 0))
 	    DBUG_RETURN(NESTED_LOOP_ERROR);
         }
         if (join->rollup.state != ROLLUP::STATE_NONE)
@@ -13090,18 +16812,54 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 *****************************************************************************/
 
 /**
-  @return
-    1 if right_item is used removable reference key on left_item
+  Check if "left_item=right_item" equality is guaranteed to be true by use of
+  [eq]ref access on left_item->field->table.
+
+  SYNOPSIS
+    test_if_ref()
+      root_cond
+      left_item
+      right_item
+
+  DESCRIPTION
+    Check if the given "left_item = right_item" equality is guaranteed to be
+    true by use of [eq_]ref access method.
+
+    We need root_cond as we can't remove ON expressions even if employed ref 
+    access guarantees that they are true. This is because  TODO
+
+  RETURN
+    TRUE   if right_item is used removable reference key on left_item
+    FALSE  Otherwise
+    
 */
 
-static bool test_if_ref(Item_field *left_item,Item *right_item)
+bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
 {
   Field *field=left_item->field;
-  // No need to change const test. We also have to keep tests on LEFT JOIN
-  if (!field->table->const_table && !field->table->maybe_null)
+  JOIN_TAB *join_tab= field->table->reginfo.join_tab;
+  // No need to change const test
+  if (!field->table->const_table && join_tab &&
+      !join_tab->is_ref_for_hash_join() &&
+      (!join_tab->first_inner ||
+       *join_tab->first_inner->on_expr_ref == root_cond))
   {
+    /*
+      If ref access uses "Full scan on NULL key" (i.e. it actually alternates
+      between ref access and full table scan), then no equality can be
+      guaranteed to be true.
+    */
+    for (uint i = 0; i < join_tab->ref.key_parts; i++)
+    {
+      if (join_tab->ref.cond_guards[i])
+      {
+        return FALSE;
+      }
+    }
+
     Item *ref_item=part_of_refkey(field->table,field);
-    if (ref_item && ref_item->eq(right_item,1))
+    if (ref_item && (ref_item->eq(right_item,1) || 
+		     ref_item->real_item()->eq(right_item,1)))
     {
       right_item= right_item->real_item();
       if (right_item->type() == Item::FIELD_ITEM)
@@ -13113,7 +16871,7 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
       {
 	/*
 	  We can remove binary fields and numerical fields except float,
-	  as float comparison isn't 100 % secure
+	  as float comparison isn't 100 % safe
 	  We have to keep normal strings to be able to check for end spaces
 	*/
 	if (field->binary() &&
@@ -13129,14 +16887,24 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
   return 0;					// keep test
 }
 
+
 /**
    Extract a condition that can be checked after reading given table
+   @fn make_cond_for_table()
 
    @param cond       Condition to analyze
    @param tables     Tables for which "current field values" are available
-   @param used_table Table that we're extracting the condition for (may
-                     also include PSEUDO_TABLE_BITS, and may be zero)
-   @param exclude_expensive_cond  Do not push expensive conditions
+   @param used_table Table that we're extracting the condition for
+      tables       Tables for which "current field values" are available (this
+                   includes used_table)
+                   (may  also include PSEUDO_TABLE_BITS, and may be zero)
+   @param join_tab_idx_arg
+		     The index of the JOIN_TAB this Item is being extracted
+                     for. MAX_TABLES if there is no corresponding JOIN_TAB.
+   @param exclude_expensive_cond
+		     Do not push expensive conditions
+   @param retain_ref_cond
+                     Retain ref conditions
 
    @retval <>NULL Generated condition
    @retval =NULL  Already checked, OR error
@@ -13166,11 +16934,161 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
      make_cond_for_info_schema() uses similar algorithm as well.
 */ 
 
-static COND *
-make_cond_for_table(COND *cond, table_map tables, table_map used_table)
+static Item *
+make_cond_for_table(THD *thd, Item *cond, table_map tables,
+                    table_map used_table,
+                    uint join_tab_idx_arg,
+                    bool exclude_expensive_cond __attribute__((unused)),
+		    bool retain_ref_cond)
+{
+  return make_cond_for_table_from_pred(thd, cond, cond, tables, used_table,
+                                       join_tab_idx_arg,
+                                       exclude_expensive_cond,
+                                       retain_ref_cond);
+}
+
+
+static Item *
+make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
+                              table_map tables, table_map used_table,
+                              uint join_tab_idx_arg,
+                              bool exclude_expensive_cond __attribute__
+                              ((unused)),
+                              bool retain_ref_cond)
+
 {
   if (used_table && !(cond->used_tables() & used_table))
     return (COND*) 0;				// Already checked
+
+  if (cond->type() == Item::COND_ITEM)
+  {
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      /* Create new top level AND item */
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;			// OOM /* purecov: inspected */
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix=make_cond_for_table_from_pred(thd, root_cond, item, 
+                                                tables, used_table,
+						join_tab_idx_arg,
+                                                exclude_expensive_cond,
+                                                retain_ref_cond);
+	if (fix)
+	  new_cond->argument_list()->push_back(fix);
+      }
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;			// Always true
+      case 1:
+	return new_cond->argument_list()->head();
+      default:
+	/*
+          Call fix_fields to propagate all properties of the children to
+          the new parent Item. This should not be expensive because all
+	  children of Item_cond_and should be fixed by now.
+	*/
+	new_cond->fix_fields(thd, 0);
+	new_cond->used_tables_cache=
+	  ((Item_cond_and*) cond)->used_tables_cache &
+	  tables;
+	return new_cond;
+      }
+    }
+    else
+    {						// Or list
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;			// OOM /* purecov: inspected */
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
+                                                tables, 0L,
+                                                join_tab_idx_arg,
+                                                exclude_expensive_cond,
+                                                retain_ref_cond);
+	if (!fix)
+	  return (COND*) 0;			// Always true
+	new_cond->argument_list()->push_back(fix);
+      }
+      /*
+        Call fix_fields to propagate all properties of the children to
+        the new parent Item. This should not be expensive because all
+        children of Item_cond_and should be fixed by now.
+      */
+      new_cond->fix_fields(thd, 0);
+      new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+
+  /*
+    Because the following test takes a while and it can be done
+    table_count times, we mark each item that we have examined with the result
+    of the test
+  */
+  if ((cond->marker == 3 && !retain_ref_cond) ||
+      (cond->used_tables() & ~tables))
+    return (COND*) 0;				// Can't check this yet
+
+  if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
+  {
+    cond->set_join_tab_idx(join_tab_idx_arg);
+    return cond;				// Not boolean op
+  }
+
+  if (cond->type() == Item::FUNC_ITEM && 
+      ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
+  {
+    Item *left_item=	((Item_func*) cond)->arguments()[0]->real_item();
+    Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
+    if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
+	test_if_ref(root_cond, (Item_field*) left_item,right_item))
+    {
+      cond->marker=3;			// Checked when read
+      return (COND*) 0;
+    }
+    if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
+	test_if_ref(root_cond, (Item_field*) right_item,left_item))
+    {
+      cond->marker=3;			// Checked when read
+      return (COND*) 0;
+    }
+  }
+  cond->marker=2;
+  cond->set_join_tab_idx(join_tab_idx_arg);
+  return cond;
+}
+
+
+/*
+  The difference of this from make_cond_for_table() is that we're in the
+  following state:
+    1. conditions referring to 'tables' have been checked
+    2. conditions referring to sjm_tables have been checked, too
+    3. We need condition that couldn't be checked in #1 or #2 but 
+       can be checked when we get both (tables | sjm_tables).
+
+*/
+static COND *
+make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, 
+                    table_map sjm_tables)
+{
+  /*
+    We assume that conditions that refer to only join prefix tables or 
+    sjm_tables have already been checked.
+  */
+  if ((!(cond->used_tables() & ~tables) || 
+       !(cond->used_tables() & ~sjm_tables)))
+    return (COND*) 0;				// Already checked
+
+  /* AND/OR recursive descent */
   if (cond->type() == Item::COND_ITEM)
   {
     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
@@ -13183,7 +17101,7 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
       Item *item;
       while ((item=li++))
       {
-	Item *fix=make_cond_for_table(item,tables,used_table);
+	Item *fix=make_cond_after_sjm(root_cond, item, tables, sjm_tables);
 	if (fix)
 	  new_cond->argument_list()->push_back(fix);
       }
@@ -13213,7 +17131,7 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
       Item *item;
       while ((item=li++))
       {
-	Item *fix=make_cond_for_table(item,tables,0L);
+	Item *fix= make_cond_after_sjm(root_cond, item, tables, 0L);
 	if (!fix)
 	  return (COND*) 0;			// Always true
 	new_cond->argument_list()->push_back(fix);
@@ -13235,23 +17153,27 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
     of the test
   */
 
-  if (cond->marker == 3 || (cond->used_tables() & ~tables))
+  if (cond->marker == 3 || (cond->used_tables() & ~(tables | sjm_tables)))
     return (COND*) 0;				// Can't check this yet
   if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
     return cond;				// Not boolean op
 
+  /* 
+    Remove equalities that are guaranteed to be true by use of 'ref' access
+    method
+  */
   if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
   {
-    Item *left_item=	((Item_func*) cond)->arguments()[0];
-    Item *right_item= ((Item_func*) cond)->arguments()[1];
+    Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
+    Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
     if (left_item->type() == Item::FIELD_ITEM &&
-	test_if_ref((Item_field*) left_item,right_item))
+	test_if_ref(root_cond, (Item_field*) left_item,right_item))
     {
       cond->marker=3;			// Checked when read
       return (COND*) 0;
     }
     if (right_item->type() == Item::FIELD_ITEM &&
-	test_if_ref((Item_field*) right_item,left_item))
+	test_if_ref(root_cond, (Item_field*) right_item,left_item))
     {
       cond->marker=3;			// Checked when read
       return (COND*) 0;
@@ -13261,22 +17183,62 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
   return cond;
 }
 
+
+/*
+  @brief
+
+  Check if
+   - @table uses "ref"-like access 
+   - it is based on "@field=certain_item" equality
+   - the equality will be true for any record returned by the access method
+  and return the certain_item if yes.
+  
+  @detail
+  
+  Equality won't necessarily hold if:
+   - the used index covers only part of the @field. 
+     Suppose, we have a CHAR(5) field and INDEX(field(3)). if you make a lookup
+     for 'abc', you will get both record with 'abc' and with 'abcde'.
+   - The type of access is actually ref_or_null, and so @field can be either 
+     a value or NULL.
+
+  @return 
+    Item that the field will be equal to
+    NULL if no such item 
+*/
+
 static Item *
 part_of_refkey(TABLE *table,Field *field)
 {
-  if (!table->reginfo.join_tab)
+  JOIN_TAB *join_tab= table->reginfo.join_tab;
+  if (!join_tab)
     return (Item*) 0;             // field from outer non-select (UPDATE,...)
 
-  uint ref_parts=table->reginfo.join_tab->ref.key_parts;
-  if (ref_parts)
+  uint ref_parts= join_tab->ref.key_parts;
+  if (ref_parts) /* if it's ref/eq_ref/ref_or_null */
   {
-    KEY_PART_INFO *key_part=
-      table->key_info[table->reginfo.join_tab->ref.key].key_part;
+    uint key= join_tab->ref.key;
+    KEY *key_info= join_tab->get_keyinfo_by_key_no(key);
+    KEY_PART_INFO *key_part= key_info->key_part;
 
     for (uint part=0 ; part < ref_parts ; part++,key_part++)
-      if (field->eq(key_part->field) &&
-	  !(key_part->key_part_flag & (HA_PART_KEY_SEG | HA_NULL_PART)))
-	return table->reginfo.join_tab->ref.items[part];
+    {
+      if (field->eq(key_part->field))
+      {
+        /*
+          Found the field in the key. Check that 
+           1. ref_or_null doesn't alternate this component between a value and
+              a NULL
+           2. index fully covers the key
+        */
+        if (part != join_tab->ref.null_ref_part &&            // (1)
+            !(key_part->key_part_flag & HA_PART_KEY_SEG))     // (2)
+        {
+          return join_tab->ref.items[part];
+        }
+        break;
+      }
+    }
   }
   return (Item*) 0;
 }
@@ -13410,23 +17372,6 @@ ok:
   @param table                 Table to scan
   @param usable_keys           Allowed keys
 
-  @note
-     As far as 
-     1) clustered primary key entry data set is a set of all record
-        fields (key fields and not key fields) and
-     2) secondary index entry data is a union of its key fields and
-        primary key fields (at least InnoDB and its derivatives don't
-        duplicate primary key fields there, even if the primary and
-        the secondary keys have a common subset of key fields),
-     then secondary index entry data is always a subset of primary key entry.
-     Unfortunately, key_info[nr].key_length doesn't show the length
-     of key/pointer pair but a sum of key field lengths only, thus
-     we can't estimate index IO volume comparing only this key_length
-     value of secondary keys and clustered PK.
-     So, try secondary keys first, and choose PK only if there are no
-     usable secondary covering keys or found best secondary key include
-     all table fields (i.e. same as PK):
-
   @return
     MAX_KEY     no suitable key found
     key index   otherwise
@@ -13434,41 +17379,23 @@ ok:
 
 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
 {
+  double min_cost= DBL_MAX;
   uint best= MAX_KEY;
-  uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
-                             table->s->primary_key != MAX_KEY &&
-                             usable_keys->is_set(table->s->primary_key)) ?
-                            table->s->primary_key : MAX_KEY;
   if (!usable_keys->is_clear_all())
   {
-    uint min_length= (uint) ~0;
     for (uint nr=0; nr < table->s->keys ; nr++)
     {
-      if (nr == usable_clustered_pk)
-        continue;
       if (usable_keys->is_set(nr))
       {
-        if (table->key_info[nr].key_length < min_length)
+        double cost= table->file->keyread_time(nr, 1, table->file->records());
+        if (cost < min_cost)
         {
-          min_length=table->key_info[nr].key_length;
+          min_cost= cost;
           best=nr;
         }
       }
     }
   }
-  if (usable_clustered_pk != MAX_KEY)
-  {
-    /*
-     If the primary key is clustered and found shorter key covers all table
-     fields then primary key scan normally would be faster because amount of
-     data to scan is the same but PK is clustered.
-     It's safe to compare key parts with table fields since duplicate key
-     parts aren't allowed.
-     */
-    if (best == MAX_KEY ||
-        table->key_info[best].key_parts >= table->s->fields)
-      best= usable_clustered_pk;
-  }
   return best;
 }
 
@@ -13573,8 +17500,6 @@ static bool
 list_contains_unique_index(TABLE *table,
                           bool (*find_func) (Field *, void *), void *data)
 {
-  if (table->pos_in_table_list->outer_join)
-    return 0;
   for (uint keynr= 0; keynr < table->s->keys; keynr++)
   {
     if (keynr == table->s->primary_key ||
@@ -13588,7 +17513,7 @@ list_contains_unique_index(TABLE *table,
            key_part < key_part_end;
            key_part++)
       {
-        if (key_part->field->real_maybe_null() || 
+        if (key_part->field->maybe_null() ||
             !find_func(key_part->field, data))
           break;
       }
@@ -13690,20 +17615,21 @@ find_field_in_item_list (Field *field, void *data)
 
 static bool
 test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
-			bool no_changes, key_map *map)
+			bool no_changes, const key_map *map)
 {
   int ref_key;
-  uint ref_key_parts;
+  uint UNINIT_VAR(ref_key_parts);
   int order_direction= 0;
   uint used_key_parts;
   TABLE *table=tab->table;
   SQL_SELECT *select=tab->select;
   key_map usable_keys;
-  QUICK_SELECT_I *save_quick= 0;
+  QUICK_SELECT_I *save_quick= select ? select->quick : 0;
+  Item *orig_cond= 0;
+  bool orig_cond_saved= false;
   int best_key= -1;
-
+  bool changed_key= false;
   DBUG_ENTER("test_if_skip_sort_order");
-  LINT_INIT(ref_key_parts);
 
   /*
     Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
@@ -13721,7 +17647,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     }
     usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
     if (usable_keys.is_clear_all())
-      DBUG_RETURN(0);					// No usable keys
+      goto use_filesort;                        // No usable keys
   }
 
   ref_key= -1;
@@ -13731,22 +17657,22 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     ref_key=	   tab->ref.key;
     ref_key_parts= tab->ref.key_parts;
     if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT)
-      DBUG_RETURN(0);
+      goto use_filesort;
   }
   else if (select && select->quick)		// Range found by opt_range
   {
     int quick_type= select->quick->get_type();
-    save_quick= select->quick;
     /* 
       assume results are not ordered when index merge is used 
       TODO: sergeyp: Results of all index merge selects actually are ordered 
       by clustered PK values.
     */
   
-    if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || 
+    if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+        quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || 
         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
-      DBUG_RETURN(0);
+      goto use_filesort;
     ref_key=	   select->quick->index;
     ref_key_parts= select->quick->used_key_parts;
   }
@@ -13768,10 +17694,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       */
       if (table->covering_keys.is_set(ref_key))
 	usable_keys.intersect(table->covering_keys);
+      if (tab->pre_idx_push_select_cond)
+      {
+        orig_cond= tab->set_cond(tab->pre_idx_push_select_cond);
+        orig_cond_saved= true;
+      }
+
       if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts,
 				       &usable_keys)) < MAX_KEY)
       {
-	/* Found key that can be used to retrieve data in sorted order */
 	if (tab->ref.key >= 0)
 	{
           /*
@@ -13785,9 +17716,10 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
           KEYUSE *keyuse= tab->keyuse;
           while (keyuse->key != new_ref_key && keyuse->table == tab->table)
             keyuse++;
+
           if (create_ref_for_key(tab->join, tab, keyuse, 
                                  tab->join->const_table_map))
-            DBUG_RETURN(0);
+            goto use_filesort;
 
           pick_table_access_method(tab);
 	}
@@ -13797,25 +17729,41 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
             The range optimizer constructed QUICK_RANGE for ref_key, and
             we want to use instead new_ref_key as the index. We can't
             just change the index of the quick select, because this may
-            result in an incosistent QUICK_SELECT object. Below we
+            result in an inconsistent QUICK_SELECT object. Below we
             create a new QUICK_SELECT from scratch so that all its
-            parameres are set correctly by the range optimizer.
+            parameters are set correctly by the range optimizer.
            */
           key_map new_ref_key_map;
+          COND *save_cond;
+          bool res;
           new_ref_key_map.clear_all();  // Force the creation of quick select
           new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
 
+          /* Reset quick;  This will be restored in 'use_filesort' if needed */
           select->quick= 0;
-          if (select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
-                                        (tab->join->select_options &
-                                         OPTION_FOUND_ROWS) ?
-                                        HA_POS_ERROR :
-                                        tab->join->unit->select_limit_cnt,0) <=
-              0)
+          save_cond= select->cond;
+          if (select->pre_idx_push_select_cond)
+            select->cond= select->pre_idx_push_select_cond;
+          res= select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
+                                         (tab->join->select_options &
+                                          OPTION_FOUND_ROWS) ?
+                                         HA_POS_ERROR :
+                                         tab->join->unit->select_limit_cnt,0,
+                                         TRUE) <= 0;
+          if (res)
+          {
+            select->cond= save_cond;
             goto use_filesort;
+          }
+          /*
+            We don't restore select->cond as we want to use the
+            original condition as index condition pushdown is not
+            active for the new index.
+          */
 	}
         ref_key= new_ref_key;
-      }
+        changed_key= true;
+     }
     }
     /* Check if we get the rows in requested sorted order by using the key */
     if (usable_keys.is_set(ref_key) &&
@@ -13824,7 +17772,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       goto check_reverse_order;
   }
   {
-    uint best_key_parts= 0;
+    uint UNINIT_VAR(best_key_parts);
     uint saved_best_key_parts= 0;
     int best_key_direction= 0;
     JOIN *join= tab->join;
@@ -13839,48 +17787,45 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     /*
       filesort() and join cache are usually faster than reading in 
       index order and not using join cache, except in case that chosen
-      index is clustered primary key.
+      index is clustered key.
     */
-    if ((select_limit >= table_records) &&
-        (tab->type == JT_ALL &&
-         tab->join->tables > tab->join->const_tables + 1) &&
-         ((unsigned) best_key != table->s->primary_key ||
-          !table->file->primary_key_is_clustered()))
+    if (best_key < 0 ||
+        ((select_limit >= table_records) &&
+         (tab->type == JT_ALL &&
+         tab->join->table_count > tab->join->const_tables + 1) &&
+         !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
       goto use_filesort;
 
-    if (best_key >= 0)
+    if (table->quick_keys.is_set(best_key) && best_key != ref_key)
     {
-      if (table->quick_keys.is_set(best_key) && best_key != ref_key)
-      {
-        key_map map;
-        map.clear_all();       // Force the creation of quick select
-        map.set_bit(best_key); // only best_key.
-        select->quick= 0;
-        select->test_quick_select(join->thd, map, 0,
-                                  join->select_options & OPTION_FOUND_ROWS ?
-                                  HA_POS_ERROR :
-                                  join->unit->select_limit_cnt,
-                                  0);
-      }
-      order_direction= best_key_direction;
-      /*
-        saved_best_key_parts is actual number of used keyparts found by the
-        test_if_order_by_key function. It could differ from keyinfo->key_parts,
-        thus we have to restore it in case of desc order as it affects
-        QUICK_SELECT_DESC behaviour.
-      */
-      used_key_parts= (order_direction == -1) ?
-        saved_best_key_parts :  best_key_parts;
+      key_map map;
+      map.clear_all();       // Force the creation of quick select
+      map.set_bit(best_key); // only best_key.
+      select->quick= 0;
+      select->test_quick_select(join->thd, map, 0,
+                                join->select_options & OPTION_FOUND_ROWS ?
+                                HA_POS_ERROR :
+                                join->unit->select_limit_cnt,
+                                TRUE, FALSE);
     }
-    else
-      goto use_filesort;
-  } 
+    order_direction= best_key_direction;
+    /*
+      saved_best_key_parts is actual number of used keyparts found by the
+      test_if_order_by_key function. It could differ from keyinfo->key_parts,
+      thus we have to restore it in case of desc order as it affects
+      QUICK_SELECT_DESC behaviour.
+    */
+    used_key_parts= (order_direction == -1) ?
+      saved_best_key_parts :  best_key_parts;
+    changed_key= true;
+  }
 
 check_reverse_order:                  
   DBUG_ASSERT(order_direction != 0);
 
   if (order_direction == -1)		// If ORDER BY ... DESC
   {
+    int quick_type;
     if (select && select->quick)
     {
       /*
@@ -13889,25 +17834,23 @@ check_reverse_order:
       */
       if (select->quick->reverse_sorted())
         goto skipped_filesort;
-      else
+
+      quick_type= select->quick->get_type();
+      if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
       {
-        int quick_type= select->quick->get_type();
-        if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
-            quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
-            quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
-            quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
-        {
-          tab->limit= 0;
-          goto use_filesort;               // Use filesort
-        }
+        tab->limit= 0;
+        goto use_filesort;               // Use filesort
       }
     }
   }
 
   /*
-    Update query plan with access pattern for doing 
-    ordered access according to what we have decided
-    above.
+    Update query plan with access pattern for doing ordered access
+    according to what we have decided above.
   */
   if (!no_changes) // We are allowed to update QEP
   {
@@ -13921,7 +17864,7 @@ check_reverse_order:
          and best_key doesn't, then revert the decision.
       */
       if (!table->covering_keys.is_set(best_key))
-        table->set_keyread(FALSE);
+        table->disable_keyread();
       if (!quick_created)
       {
         if (select)                  // Throw any existing quick select
@@ -13932,14 +17875,24 @@ check_reverse_order:
                                 join_read_first:join_read_last;
         tab->type=JT_NEXT;           // Read with index_first(), index_next()
 
-        if (table->covering_keys.is_set(best_key))
-          table->set_keyread(TRUE);
+        if (table->covering_keys.is_set(best_key) && ! table->key_read)
+          table->enable_keyread();
+        if (tab->pre_idx_push_select_cond)
+        {
+          tab->set_cond(tab->pre_idx_push_select_cond);
+          /*
+            orig_cond is a part of pre_idx_push_cond,
+            no need to restore it.
+          */
+          orig_cond= 0;
+          orig_cond_saved= false;
+        }
         table->file->ha_index_or_rnd_end();
         if (tab->join->select_options & SELECT_DESCRIBE)
         {
           tab->ref.key= -1;
           tab->ref.key_parts= 0;
-          if (select_limit < table->file->stats.records) 
+          if (select_limit < table->file->stats.records)
             tab->limit= select_limit;
         }
       }
@@ -13958,6 +17911,14 @@ check_reverse_order:
         tab->read_first_record= join_init_read_record;
         if (tab->is_using_loose_index_scan())
           tab->join->tmp_table_param.precomputed_group_by= TRUE;
+
+        /*
+          Restore the original condition as changes done by pushdown
+          condition are not relevant anymore
+        */
+        if (tab->select && tab->select->pre_idx_push_select_cond)
+          tab->set_cond(tab->select->pre_idx_push_select_cond);
+
         /*
           TODO: update the number of records in join->best_positions[tablenr]
         */
@@ -13993,7 +17954,7 @@ check_reverse_order:
       }
     }
     else if (select && select->quick)
-      select->quick->sorted= 1;
+      select->quick->need_sorted_output();
 
   } // QEP has been modified
 
@@ -14010,6 +17971,14 @@ skipped_filesort:
     delete save_quick;
     save_quick= NULL;
   }
+          /*
+            orig_cond is a part of pre_idx_push_cond,
+            no need to restore it.
+          */
+          orig_cond= 0;
+          orig_cond_saved= false;
+  if (orig_cond_saved && !changed_key)
+    tab->set_cond(orig_cond);
   DBUG_RETURN(1);
 
 use_filesort:
@@ -14019,6 +17988,8 @@ use_filesort:
     delete select->quick;
     select->quick= save_quick;
   }
+  if (orig_cond_saved)
+    tab->set_cond(orig_cond);
   DBUG_RETURN(0);
 }
 
@@ -14064,12 +18035,15 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   JOIN_TAB *tab;
   DBUG_ENTER("create_sort_index");
 
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
     DBUG_RETURN(0);				// One row, no need to sort
   tab=    join->join_tab + join->const_tables;
   table=  tab->table;
   select= tab->select;
 
+  /* Currently ORDER BY ... LIMIT is not supported in subqueries. */
+  DBUG_ASSERT(join->group_list || !join->is_in_subquery());
+
   /*
     When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
     and thus force sorting on disk unless a group min-max optimization
@@ -14106,7 +18080,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
         and in index_merge 'Only index' cannot be used
       */
       if (((uint) tab->ref.key != select->quick->index))
-        table->set_keyread(FALSE);
+        table->disable_keyread();
     }
     else
     {
@@ -14117,7 +18091,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 	field, quick will contain an empty record set.
       */
       if (!(select->quick= (tab->type == JT_FT ?
-			    new FT_SELECT(thd, table, tab->ref.key) :
+			    get_ft_select(thd, table, tab->ref.key) :
 			    get_quick_select_for_ref(thd, table, &tab->ref, 
                                                      tab->found_records))))
 	goto err;
@@ -14129,6 +18103,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
       get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX))
     goto err;
 
+  if (!tab->preread_init_done && tab->preread_init())
+    goto err;
   if (table->s->tmp_table)
     table->file->info(HA_STATUS_VARIABLE);	// Get record count
   table->sort.found_records=filesort(thd, table,join->sortorder, length,
@@ -14154,21 +18130,21 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     select->cleanup();				// filesort did select
     tab->select= 0;
     table->quick_keys.clear_all();  // as far as we cleanup select->quick
+    table->intersect_keys.clear_all();
     table->sort.io_cache= tablesort_result_cache;
   }
-  tab->select_cond=0;
+  tab->set_select_cond(NULL, __LINE__);
   tab->last_inner= 0;
   tab->first_unmatched= 0;
   tab->type=JT_ALL;				// Read with normal read_record
   tab->read_first_record= join_init_read_record;
   tab->join->examined_rows+=examined_rows;
-  table->set_keyread(FALSE); // Restore if we used indexes
+  table->disable_keyread(); // Restore if we used indexes
   DBUG_RETURN(table->sort.found_records == HA_POS_ERROR);
 err:
   DBUG_RETURN(-1);
 }
 
-
 /*****************************************************************************
   Remove duplicates from tmp table
   This should be recoded to add a unique index to the table and remove
@@ -14271,8 +18247,10 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
   org_record=(char*) (record=table->record[0])+offset;
   new_record=(char*) table->record[1]+offset;
 
-  file->ha_rnd_init(1);
-  error=file->rnd_next(record);
+  if (file->ha_rnd_init_with_error(1))
+    DBUG_RETURN(1);
+
+  error= file->ha_rnd_next(record);
   for (;;)
   {
     if (thd->killed)
@@ -14285,7 +18263,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
     {
       if (error == HA_ERR_RECORD_DELETED)
       {
-        error= file->rnd_next(record);
+        error= file->ha_rnd_next(record);
         continue;
       }
       if (error == HA_ERR_END_OF_FILE)
@@ -14294,9 +18272,9 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
     }
     if (having && !having->val_int())
     {
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
-      error=file->rnd_next(record);
+      error= file->ha_rnd_next(record);
       continue;
     }
     if (copy_blobs(first_field))
@@ -14311,7 +18289,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
     bool found=0;
     for (;;)
     {
-      if ((error=file->rnd_next(record)))
+      if ((error= file->ha_rnd_next(record)))
       {
 	if (error == HA_ERR_RECORD_DELETED)
 	  continue;
@@ -14321,19 +18299,20 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
       }
       if (compare_record(table, first_field) == 0)
       {
-	if ((error=file->ha_delete_row(record)))
+	if ((error= file->ha_delete_row(record)))
 	  goto err;
       }
       else if (!found)
       {
 	found=1;
-	file->position(record);	// Remember position
+        if ((error= file->remember_rnd_pos()))
+          goto err;
       }
     }
     if (!found)
       break;					// End of file
-    /* Restart search on next row */
-    error=file->restart_rnd_next(record,file->ref);
+    /* Restart search on saved row */
+    error=file->restart_rnd_next(record);
   }
 
   file->extra(HA_EXTRA_NO_CACHE);
@@ -14399,7 +18378,9 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     DBUG_RETURN(1);
   }
 
-  file->ha_rnd_init(1);
+  if ((error= file->ha_rnd_init(1)))
+    goto err;
+
   key_pos=key_buffer;
   for (;;)
   {
@@ -14410,7 +18391,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
       error=0;
       goto err;
     }
-    if ((error=file->rnd_next(record)))
+    if ((error= file->ha_rnd_next(record)))
     {
       if (error == HA_ERR_RECORD_DELETED)
 	continue;
@@ -14420,7 +18401,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     }
     if (having && !having->val_int())
     {
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
       continue;
     }
@@ -14437,7 +18418,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     if (my_hash_search(&hash, org_key_pos, key_length))
     {
       /* Duplicated found ; Remove the row */
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
     }
     else
@@ -14503,274 +18484,52 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
 }
 
 
-/*****************************************************************************
-  Fill join cache with packed records
-  Records are stored in tab->cache.buffer and last record in
-  last record is stored with pointers to blobs to support very big
-  records
-******************************************************************************/
-
-static int
-join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count)
-{
-  reg1 uint i;
-  uint length, blobs;
-  size_t size;
-  CACHE_FIELD *copy,**blob_ptr;
-  JOIN_CACHE  *cache;
-  JOIN_TAB *join_tab;
-  DBUG_ENTER("join_init_cache");
+/*
+  eq_ref: Create the lookup key and check if it is the same as saved key
 
-  cache= &tables[table_count].cache;
-  cache->fields=blobs=0;
 
-  join_tab=tables;
-  for (i=0 ; i < table_count ; i++,join_tab++)
-  {
-    if (!join_tab->used_fieldlength)		/* Not calced yet */
-      calc_used_field_length(thd, join_tab);
-    cache->fields+=join_tab->used_fields;
-    blobs+=join_tab->used_blobs;
-  }
-  if (!(cache->field=(CACHE_FIELD*)
-	sql_alloc(sizeof(CACHE_FIELD)*(cache->fields+table_count*2)+(blobs+1)*
 
-		  sizeof(CACHE_FIELD*))))
-  {
-    my_free(cache->buff);		/* purecov: inspected */
-    cache->buff=0;				/* purecov: inspected */
-    DBUG_RETURN(1);				/* purecov: inspected */
-  }
-  copy=cache->field;
-  blob_ptr=cache->blob_ptr=(CACHE_FIELD**)
-    (cache->field+cache->fields+table_count*2);
 
-  length=0;
-  for (i=0 ; i < table_count ; i++)
-  {
-    bool have_bit_fields= FALSE;
-    uint null_fields=0,used_fields;
-    Field **f_ptr,*field;
-    MY_BITMAP *read_set= tables[i].table->read_set;
-    for (f_ptr=tables[i].table->field,used_fields=tables[i].used_fields ;
-	 used_fields ;
-	 f_ptr++)
-    {
-      field= *f_ptr;
-      if (bitmap_is_set(read_set, field->field_index))
-      {
-	used_fields--;
-	length+=field->fill_cache_field(copy);
-	if (copy->type == CACHE_BLOB)
-	  (*blob_ptr++)=copy;
-	if (field->real_maybe_null())
-	  null_fields++;
-        if (field->type() == MYSQL_TYPE_BIT &&
-            ((Field_bit*)field)->bit_len)
-          have_bit_fields= TRUE;    
-	copy++;
-      }
-    }
-    /* Copy null bits from table */
-    if (null_fields || have_bit_fields)
-    {						/* must copy null bits */
-      copy->str= tables[i].table->null_flags;
-      copy->length= tables[i].table->s->null_bytes;
-      copy->type=0;
-      copy->field=0;
-      length+=copy->length;
-      copy++;
-      cache->fields++;
-    }
-    /* If outer join table, copy null_row flag */
-    if (tables[i].table->maybe_null)
-    {
-      copy->str= (uchar*) &tables[i].table->null_row;
-      copy->length=sizeof(tables[i].table->null_row);
-      copy->type=0;
-      copy->field=0;
-      length+=copy->length;
-      copy++;
-      cache->fields++;
-    }
-  }
-
-  cache->length=length+blobs*sizeof(char*);
-  cache->blobs=blobs;
-  *blob_ptr=0;					/* End sequentel */
-  size=max(thd->variables.join_buff_size, cache->length);
-  if (!(cache->buff=(uchar*) my_malloc(size,MYF(0))))
-    DBUG_RETURN(1);				/* Don't use cache */ /* purecov: inspected */
-  cache->end=cache->buff+size;
-  reset_cache_write(cache);
-  DBUG_RETURN(0);
-}
-
-
-static ulong
-used_blob_length(CACHE_FIELD **ptr)
-{
-  uint length,blob_length;
-  for (length=0 ; *ptr ; ptr++)
-  {
-    Field_blob *field_blob= (Field_blob *) (*ptr)->field;
-    (*ptr)->blob_length=blob_length= field_blob->get_length();
-    length+=blob_length;
-    field_blob->get_ptr(&(*ptr)->str);
-  }
-  return length;
-}
+  SYNOPSIS
+    cmp_buffer_with_ref()
+      tab      Join tab of the accessed table
+      table    The table to read.  This is usually tab->table, except for 
+               semi-join when we might need to make a lookup in a temptable
+               instead.
+      tab_ref  The structure with methods to collect index lookup tuple. 
+               This is usually table->ref, except for the case of when we're 
+               doing lookup into semi-join materialization table.
+
+  DESCRIPTION 
+    Used by eq_ref access method: create the index lookup key and check if 
+    we've used this key at previous lookup (If yes, we don't need to repeat
+    the lookup - the record has been already fetched)
 
+  RETURN 
+    TRUE   No cached record for the key, or failed to create the key (due to
+           out-of-domain error)
+    FALSE  The created key is the same as the previous one (and the record 
+           is already in table->record)
+*/
 
 static bool
-store_record_in_cache(JOIN_CACHE *cache)
+cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
 {
-  uint length;
-  uchar *pos;
-  CACHE_FIELD *copy,*end_field;
-  bool last_record;
-
-  pos=cache->pos;
-  end_field=cache->field+cache->fields;
-
-  length=cache->length;
-  if (cache->blobs)
-    length+=used_blob_length(cache->blob_ptr);
-  if ((last_record= (length + cache->length > (size_t) (cache->end - pos))))
-    cache->ptr_record=cache->records;
-
-  /*
-    There is room in cache. Put record there
-  */
-  cache->records++;
-  for (copy=cache->field ; copy < end_field; copy++)
+  bool no_prev_key;
+  if (!tab_ref->disable_cache)
   {
-    if (copy->type == CACHE_BLOB)
+    if (!(no_prev_key= tab_ref->key_err))
     {
-      Field_blob *blob_field= (Field_blob *) copy->field;
-      if (last_record)
-      {
-	blob_field->get_image(pos, copy->length+sizeof(char*), 
-                              blob_field->charset());
-	pos+=copy->length+sizeof(char*);
-      }
-      else
-      {
-	blob_field->get_image(pos, copy->length, // blob length
-                              blob_field->charset());
-	memcpy(pos+copy->length,copy->str,copy->blob_length);  // Blob data
-	pos+=copy->length+copy->blob_length;
-      }
-    }
-    else
-    {
-      if (copy->type == CACHE_STRIPPED)
-      {
-	uchar *str,*end;
-        Field *field= copy->field;
-        if (field && field->maybe_null() && field->is_null())
-          end= str= copy->str;
-        else
-          for (str=copy->str,end= str+copy->length;
-               end > str && end[-1] == ' ' ;
-               end--) ;
-	length=(uint) (end-str);
-	memcpy(pos+2, str, length);
-        int2store(pos, length);
-	pos+= length+2;
-      }
-      else
-      {
-	memcpy(pos,copy->str,copy->length);
-	pos+=copy->length;
-      }
+      /* Previous access found a row. Copy its key */
+      memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
     }
   }
-  cache->pos=pos;
-  return last_record || (size_t) (cache->end - pos) < cache->length;
-}
-
-
-static void
-reset_cache_read(JOIN_CACHE *cache)
-{
-  cache->record_nr=0;
-  cache->pos=cache->buff;
-}
-
-
-static void reset_cache_write(JOIN_CACHE *cache)
-{
-  reset_cache_read(cache);
-  cache->records= 0;
-  cache->ptr_record= (uint) ~0;
-}
-
-
-static void
-read_cached_record(JOIN_TAB *tab)
-{
-  uchar *pos;
-  uint length;
-  bool last_record;
-  CACHE_FIELD *copy,*end_field;
-
-  last_record=tab->cache.record_nr++ == tab->cache.ptr_record;
-  pos=tab->cache.pos;
-
-  for (copy=tab->cache.field,end_field=copy+tab->cache.fields ;
-       copy < end_field;
-       copy++)
-  {
-    if (copy->type == CACHE_BLOB)
-    {
-      Field_blob *blob_field= (Field_blob *) copy->field;
-      if (last_record)
-      {
-	blob_field->set_image(pos, copy->length+sizeof(char*),
-                              blob_field->charset());
-	pos+=copy->length+sizeof(char*);
-      }
-      else
-      {
-	blob_field->set_ptr(pos, pos+copy->length);
-	pos+=copy->length + blob_field->get_length();
-      }
-    }
-    else
-    {
-      if (copy->type == CACHE_STRIPPED)
-      {
-        length= uint2korr(pos);
-	memcpy(copy->str, pos+2, length);
-	memset(copy->str+length, ' ', copy->length-length);
-	pos+= 2 + length;
-      }
-      else
-      {
-	memcpy(copy->str,pos,copy->length);
-	pos+=copy->length;
-      }
-    }
-  }
-  tab->cache.pos=pos;
-  return;
-}
-
-
-static bool
-cmp_buffer_with_ref(JOIN_TAB *tab)
-{
-  bool diff;
-  if (!(diff=tab->ref.key_err))
-  {
-    memcpy(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length);
-  }
-  if ((tab->ref.key_err= cp_buffer_from_ref(tab->join->thd, tab->table,
-                                            &tab->ref)) ||
-      diff)
+  else 
+    no_prev_key= TRUE;
+  if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
+      no_prev_key)
     return 1;
-  return memcmp(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length)
+  return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
     != 0;
 }
 
@@ -14863,7 +18622,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     order->in_field_list= 1;
     order->counter= count;
     order->counter_used= 1;
-    return FALSE;
+   return FALSE;
   }
   /* Lookup the current GROUP/ORDER field in the SELECT clause. */
   select_item= find_item_in_list(order_item, fields, &counter,
@@ -14888,8 +18647,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     /* Lookup the current GROUP field in the FROM clause. */
     order_item_type= order_item->type();
     from_field= (Field*) not_found_field;
-    if ((is_group_field &&
-        order_item_type == Item::FIELD_ITEM) ||
+    if ((is_group_field && order_item_type == Item::FIELD_ITEM) ||
         order_item_type == Item::REF_ITEM)
     {
       from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
@@ -14949,30 +18707,12 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     time.
 
     We check order_item->fixed because Item_func_group_concat can put
-    arguments for which fix_fields already was called.
-    
-    group_fix_field= TRUE is to resolve aliases from the SELECT list
-    without creating of Item_ref-s: JOIN::exec() wraps aliased items
-    in SELECT list with Item_copy items. To re-evaluate such a tree
-    that includes Item_copy items we have to refresh Item_copy caches,
-    but:
-      - filesort() never refresh Item_copy items,
-      - end_send_group() checks every record for group boundary by the
-        test_if_group_changed function that obtain data from these
-        Item_copy items, but the copy_fields function that
-        refreshes Item copy items is called after group boundaries only -
-        that is a vicious circle.
-    So we prevent inclusion of Item_copy items.
+    arguments for which fix_fields already was called.    
   */
-  bool save_group_fix_field= thd->lex->current_select->group_fix_field;
-  if (is_group_field)
-    thd->lex->current_select->group_fix_field= TRUE;
-  bool ret= (!order_item->fixed &&
+  if (!order_item->fixed &&
       (order_item->fix_fields(thd, order->item) ||
        (order_item= *order->item)->check_cols(1) ||
-       thd->is_fatal_error));
-  thd->lex->current_select->group_fix_field= save_group_fix_field;
-  if (ret)
+       thd->is_fatal_error))
     return TRUE; /* Wrong field. */
 
   uint el= all_fields.elements;
@@ -15044,6 +18784,8 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
   uint org_fields=all_fields.elements;
 
   thd->where="group statement";
+  enum_parsing_place save_place= thd->lex->current_select->parsing_place;
+  thd->lex->current_select->parsing_place= IN_GROUP_BY;
   for (ord= order; ord; ord= ord->next)
   {
     if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
@@ -15056,6 +18798,8 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
       return 1;
     }
   }
+  thd->lex->current_select->parsing_place= save_place;
+
   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY)
   {
     /*
@@ -15323,8 +19067,10 @@ test_if_subpart(ORDER *a,ORDER *b)
 */
 
 static TABLE *
-get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
+get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables)
 {
+  TABLE_LIST *table;
+  List_iterator<TABLE_LIST> ti(tables);
   table_map map= (table_map) 0;
   DBUG_ENTER("get_sort_by_table");
 
@@ -15342,11 +19088,11 @@ get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
   if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
     DBUG_RETURN(0);
 
-  for (; !(map & tables->table->map); tables= tables->next_leaf) ;
-  if (map != tables->table->map)
+  while ((table= ti++) && !(map & table->table->map)) ;
+  if (map != table->table->map)
     DBUG_RETURN(0);				// More than one table
-  DBUG_PRINT("exit",("sort by table: %d",tables->table->tablenr));
-  DBUG_RETURN(tables->table);
+  DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr));
+  DBUG_RETURN(table->table);
 }
 
 
@@ -15482,7 +19228,7 @@ alloc_group_fields(JOIN *join,ORDER *group)
   {
     for (; group ; group=group->next)
     {
-      Cached_item *tmp=new_Cached_item(join->thd, *group->item);
+      Cached_item *tmp=new_Cached_item(join->thd, *group->item, TRUE);
       if (!tmp || join->group_fields.push_front(tmp))
 	return TRUE;
     }
@@ -15492,6 +19238,38 @@ alloc_group_fields(JOIN *join,ORDER *group)
 }
 
 
+
+/*
+  Test if a single-row cache of items changed, and update the cache.
+
+  @details Test if a list of items that typically represents a result
+  row has changed. If the value of some item changed, update the cached
+  value for this item.
+  
+  @param list list of <item, cached_value> pairs stored as Cached_item.
+
+  @return -1 if no item changed
+  @return index of the first item that changed
+*/
+
+int test_if_item_cache_changed(List<Cached_item> &list)
+{
+  DBUG_ENTER("test_if_item_cache_changed");
+  List_iterator<Cached_item> li(list);
+  int idx= -1,i;
+  Cached_item *buff;
+
+  for (i=(int) list.elements-1 ; (buff=li++) ; i--)
+  {
+    if (buff->cmp())
+      idx=i;
+  }
+  DBUG_PRINT("info", ("idx: %d", idx));
+  DBUG_RETURN(idx);
+}
+
+
+
 static int
 test_if_group_changed(List<Cached_item> &list)
 {
@@ -15589,7 +19367,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
       pos= item;
       if (item->field->flags & BLOB_FLAG)
       {
-	if (!(pos= Item_copy::create(pos)))
+	if (!(pos= new Item_copy_string(pos)))
 	  goto err;
        /*
          Item_copy_string::copy for function can call 
@@ -15623,7 +19401,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
           DBUG_ASSERT (param->field_count > (uint) (copy - copy_start));
           copy->set(tmp, item->result_field);
           item->result_field->move_field(copy->to_ptr,copy->to_null_ptr,1);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
           copy->to_ptr[copy->from_length]= 0;
 #endif
           copy++;
@@ -15631,7 +19409,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
       }
     }
     else if ((real_pos->type() == Item::FUNC_ITEM ||
-	      real_pos->type() == Item::SUBSELECT_ITEM ||
+	      real_pos->real_type() == Item::SUBSELECT_ITEM ||
 	      real_pos->type() == Item::CACHE_ITEM ||
 	      real_pos->type() == Item::COND_ITEM) &&
 	     !real_pos->with_sum_func)
@@ -15643,7 +19421,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
 	 on how the value is to be used: In some cases this may be an
 	 argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
       */
-      if (!(pos= Item_copy::create(pos)))
+      if (!(pos=new Item_copy_string(pos)))
 	goto err;
       if (i < border)                           // HAVING, ORDER and GROUP BY
       {
@@ -15696,8 +19474,8 @@ copy_fields(TMP_TABLE_PARAM *param)
     (*ptr->do_copy)(ptr);
 
   List_iterator_fast<Item> it(param->copy_funcs);
-  Item_copy *item;
-  while ((item = (Item_copy*) it++))
+  Item_copy_string *item;
+  while ((item = (Item_copy_string*) it++))
     item->copy();
 }
 
@@ -15874,6 +19652,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array,
 	  char buff[256];
 	  String str(buff,sizeof(buff),&my_charset_bin);
 	  str.length(0);
+          str.extra_allocation(1024);
 	  item->print(&str, QT_ORDINARY);
 	  item_field->name= sql_strmake(str.ptr(),str.length());
 	}
@@ -16110,16 +19889,28 @@ static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
     DBUG_RETURN(TRUE);
 
   if (!cond->fixed)
-    cond->fix_fields(thd, (Item**)&cond);
+  {
+    Item *tmp_item= (Item*) cond;
+    cond->fix_fields(thd, &tmp_item);
+    DBUG_ASSERT(cond == tmp_item);
+  }
   if (join_tab->select)
   {
     if (join_tab->select->cond)
       error=(int) cond->add(join_tab->select->cond);
-    join_tab->select_cond=join_tab->select->cond=cond;
+    join_tab->select->cond= cond;
+    if (join_tab->select->pre_idx_push_select_cond)
+    {
+      Item *new_cond= and_conds(join_tab->select->pre_idx_push_select_cond, cond);
+      if (!new_cond->fixed && new_cond->fix_fields(thd, &new_cond))
+        error= 1;
+      join_tab->select->pre_idx_push_select_cond= new_cond;
+    }
+    join_tab->set_select_cond(cond, __LINE__);
   }
   else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, 0,
                                           &error)))
-    join_tab->select_cond=cond;
+    join_tab->set_select_cond(cond, __LINE__);
 
   DBUG_RETURN(error ? TRUE : FALSE);
 }
@@ -16222,6 +20013,7 @@ static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list,
     if (arg_changed)
     {
       expr->maybe_null= 1;
+      expr->in_rollup= 1;
       *changed= TRUE;
     }
   }
@@ -16285,6 +20077,7 @@ bool JOIN::rollup_init()
       if (*group_tmp->item == item)
       {
         item->maybe_null= 1;
+        item->in_rollup= 1;
         found_in_group= 1;
         break;
       }
@@ -16515,6 +20308,7 @@ int JOIN::rollup_send_data(uint idx)
   uint i;
   for (i= send_group_parts ; i-- > idx ; )
   {
+    int res= 0;
     /* Get reference pointers to sum functions in place */
     memcpy((char*) ref_pointer_array,
 	   (char*) rollup.ref_pointer_arrays[i],
@@ -16522,9 +20316,10 @@ int JOIN::rollup_send_data(uint idx)
     if ((!having || having->val_int()))
     {
       if (send_records < unit->select_limit_cnt && do_send_rows &&
-	  result->send_data(rollup.fields[i]))
+	  (res= result->send_data(rollup.fields[i])) > 0)
 	return 1;
-      send_records++;
+      if (!res)
+        send_records++;
     }
   }
   /* Restore ref_pointer_array */
@@ -16572,10 +20367,12 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg)
           item->save_in_result_field(1);
       }
       copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
-      if ((write_error= table_arg->file->ha_write_row(table_arg->record[0])))
+      if ((write_error= table_arg->file->ha_write_tmp_row(table_arg->record[0])))
       {
-	if (create_myisam_from_heap(thd, table_arg, &tmp_table_param,
-                                    write_error, 0))
+	if (create_internal_tmp_table_from_heap(thd, table_arg, 
+                                                tmp_table_param.start_recinfo,
+                                                &tmp_table_param.recinfo,
+                                                write_error, 0))
 	  return 1;		     
       }
     }
@@ -16657,7 +20454,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       the UNION to provide precise EXPLAIN information will hardly be
       appreciated :)
     */
-    char table_name_buffer[NAME_LEN];
+    char table_name_buffer[SAFE_NAME_LEN];
     item_list.empty();
     /* id */
     item_list.push_back(new Item_null);
@@ -16718,46 +20515,76 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
     if (result->send_data(item_list))
       join->error= 1;
   }
-  else
+  else if (!join->select_lex->master_unit()->derived ||
+           join->select_lex->master_unit()->derived->is_materialized_derived())
   {
     table_map used_tables=0;
-    for (uint i=0 ; i < join->tables ; i++)
+
+    bool printing_materialize_nest= FALSE;
+    uint select_id= join->select_lex->select_number;
+
+    for (JOIN_TAB *tab= first_breadth_first_tab(join); tab;
+         tab= next_breadth_first_tab(join, tab))
     {
-      JOIN_TAB *tab=join->join_tab+i;
+      if (tab->bush_root_tab)
+      {
+        JOIN_TAB *first_sibling= tab->bush_root_tab->bush_children->start;
+        select_id= first_sibling->emb_sj_nest->sj_subq_pred->get_identifier();
+        printing_materialize_nest= TRUE;
+      }
+
       TABLE *table=tab->table;
       TABLE_LIST *table_list= tab->table->pos_in_table_list;
       char buff[512]; 
-      char buff1[512], buff2[512], buff3[512];
+      char buff1[512], buff2[512], buff3[512], buff4[512];
       char keylen_str_buf[64];
+      my_bool key_read;
       String extra(buff, sizeof(buff),cs);
-      char table_name_buffer[NAME_LEN];
+      char table_name_buffer[SAFE_NAME_LEN];
       String tmp1(buff1,sizeof(buff1),cs);
       String tmp2(buff2,sizeof(buff2),cs);
       String tmp3(buff3,sizeof(buff3),cs);
+      String tmp4(buff4,sizeof(buff4),cs);
+      char hash_key_prefix[]= "#hash#";
+      KEY *key_info= 0;
+      uint key_len= 0;
+      bool is_hj= tab->type == JT_HASH || tab->type ==JT_HASH_NEXT;
+
       extra.length(0);
       tmp1.length(0);
       tmp2.length(0);
       tmp3.length(0);
-
+      tmp4.length(0);
       quick_type= -1;
+
+      /* Don't show eliminated tables */
+      if (table->map & join->eliminated_tables)
+      {
+        used_tables|=table->map;
+        continue;
+      }
+
       item_list.empty();
       /* id */
-      item_list.push_back(new Item_uint((uint32)
-				       join->select_lex->select_number));
+      item_list.push_back(new Item_uint((uint32)select_id));
       /* select_type */
-      item_list.push_back(new Item_string(join->select_lex->type,
-					  strlen(join->select_lex->type),
-					  cs));
-      if (tab->type == JT_ALL && tab->select && tab->select->quick)
+      const char* stype= printing_materialize_nest? "SUBQUERY" : 
+                                                    join->select_lex->type;
+      item_list.push_back(new Item_string(stype, strlen(stype), cs));
+      
+      if ((tab->type == JT_ALL || tab->type == JT_HASH) &&
+           tab->select && tab->select->quick)
       {
         quick_type= tab->select->quick->get_type();
         if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
+            (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) ||
             (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
             (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
-          tab->type = JT_INDEX_MERGE;
+          tab->type= tab->type == JT_ALL ? JT_INDEX_MERGE : JT_HASH_INDEX_MERGE;
         else
-	  tab->type = JT_RANGE;
+	  tab->type= tab->type == JT_ALL ? JT_RANGE : JT_HASH_RANGE;
       }
+
       /* table */
       if (table->derived_select_number)
       {
@@ -16767,6 +20594,16 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 			     table->derived_select_number);
 	item_list.push_back(new Item_string(table_name_buffer, len, cs));
       }
+      else if (tab->bush_children)
+      {
+        JOIN_TAB *ctab= tab->bush_children->start;
+        /* table */
+        int len= my_snprintf(table_name_buffer, 
+                             sizeof(table_name_buffer)-1,
+                             "<subquery%d>", 
+                             ctab->emb_sj_nest->sj_subq_pred->get_identifier());
+	item_list.push_back(new Item_string(table_name_buffer, len, cs));
+      }
       else
       {
         TABLE_LIST *real_table= table->pos_in_table_list; 
@@ -16819,49 +20656,72 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(item_null);
 
       /* Build "key", "key_len", and "ref" values and add them to item_list */
-      if (tab->ref.key_parts)
+      if (tab->type == JT_NEXT)
+      {
+	key_info= table->key_info+tab->index;
+        key_len= key_info->key_length;
+      }
+      else if (tab->ref.key_parts)
+      {
+	key_info= tab->get_keyinfo_by_key_no(tab->ref.key);
+        key_len= tab->ref.key_length;
+      }
+      if (key_info)
       {
-	KEY *key_info=table->key_info+ tab->ref.key;
         register uint length;
-	item_list.push_back(new Item_string(key_info->name,
-					    strlen(key_info->name),
-					    system_charset_info));
-        length= longlong2str(tab->ref.key_length, keylen_str_buf, 10) - 
-                keylen_str_buf;
-        item_list.push_back(new Item_string(keylen_str_buf, length,
-                                            system_charset_info));
-	for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
+        if (is_hj)
+          tmp2.append(hash_key_prefix, strlen(hash_key_prefix), cs);
+        tmp2.append(key_info->name,  strlen(key_info->name), cs);
+        length= (longlong10_to_str(key_len, keylen_str_buf, 10) - 
+                 keylen_str_buf);
+        tmp3.append(keylen_str_buf, length, cs);
+        if (tab->ref.key_parts)
 	{
-	  if (tmp2.length())
-	    tmp2.append(',');
-	  tmp2.append((*ref)->name(), strlen((*ref)->name()),
-		      system_charset_info);
-	}
-	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+	  for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
+	  {
+	    if (tmp4.length())
+	      tmp4.append(',');
+	    tmp4.append((*ref)->name(), strlen((*ref)->name()), cs);
+          }
+        }
       }
-      else if (tab->type == JT_NEXT)
+      if (is_hj && tab->type != JT_HASH)
       {
-	KEY *key_info=table->key_info+ tab->index;
-        register uint length;
-	item_list.push_back(new Item_string(key_info->name,
-					    strlen(key_info->name),cs));
-        length= longlong2str(key_info->key_length, keylen_str_buf, 10) - 
-                keylen_str_buf;
-        item_list.push_back(new Item_string(keylen_str_buf, 
-                                            length,
-                                            system_charset_info));
-	item_list.push_back(item_null);
+        tmp2.append(':');
+        tmp3.append(':');
       }
-      else if (tab->select && tab->select->quick)
+      if (tab->type == JT_HASH_NEXT)
       {
+        register uint length;
+	key_info= table->key_info+tab->index;
+        key_len= key_info->key_length;
+        tmp2.append(key_info->name,  strlen(key_info->name), cs);
+        length= (longlong10_to_str(key_len, keylen_str_buf, 10) - 
+                 keylen_str_buf);
+        tmp3.append(keylen_str_buf, length, cs);
+      }
+      if ((is_hj || tab->type==JT_RANGE || tab->type == JT_INDEX_MERGE) &&
+          tab->select && tab->select->quick)
         tab->select->quick->add_keys_and_lengths(&tmp2, &tmp3);
-	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
-	item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
-	item_list.push_back(item_null);
+      if (key_info || (tab->select && tab->select->quick))
+      {
+        if (tmp2.length())
+          item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+        else
+          item_list.push_back(item_null);
+        if (tmp3.length())
+          item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
+        else
+          item_list.push_back(item_null);
+        if (key_info && tab->type != JT_NEXT)
+          item_list.push_back(new Item_string(tmp4.ptr(),tmp4.length(),cs));
+        else
+          item_list.push_back(item_null);
       }
       else
       {
-        if (table_list->schema_table &&
+        if (table_list && /* SJM bushes don't have table_list */
+            table_list->schema_table &&
             table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
         {
           const char *tmp_buff;
@@ -16892,7 +20752,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       
       /* Add "rows" field to item_list. */
-      if (table_list->schema_table)
+      if (table_list /* SJM bushes don't have table_list */ &&
+          table_list->schema_table)
       {
         /* in_rows */
         if (join->thd->lex->describe & DESCRIBE_EXTENDED)
@@ -16905,18 +20766,28 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         ha_rows examined_rows;
         if (tab->select && tab->select->quick)
           examined_rows= tab->select->quick->records;
-        else if (tab->type == JT_NEXT || tab->type == JT_ALL)
+        else if (tab->type == JT_NEXT || tab->type == JT_ALL || is_hj)
         {
           if (tab->limit)
             examined_rows= tab->limit;
           else
           {
-            tab->table->file->info(HA_STATUS_VARIABLE);
-            examined_rows= tab->table->file->stats.records;
+            if (tab->table->is_filled_at_execution())
+            {
+              examined_rows= tab->records;
+            }
+            else
+            {
+              /*
+                handler->info(HA_STATUS_VARIABLE) has been called in
+                make_join_statistics()
+              */
+              examined_rows= tab->table->file->stats.records;
+            }
           }
         }
         else
-          examined_rows=(ha_rows)join->best_positions[i].records_read; 
+          examined_rows=(ha_rows)tab->records_read; 
  
         item_list.push_back(new Item_int((longlong) (ulonglong) examined_rows, 
                                          MY_INT64_NUM_DECIMAL_DIGITS));
@@ -16926,14 +20797,14 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         {
           float f= 0.0; 
           if (examined_rows)
-            f= (float) (100.0 * join->best_positions[i].records_read /
-                        examined_rows);
+            f= (float) (100.0 * tab->records_read / examined_rows);
+ 	  set_if_smaller(f, 100.0);
           item_list.push_back(new Item_float(f, 2));
         }
       }
 
       /* Build "Extra" field and add it to item_list. */
-      my_bool key_read=table->key_read;
+      key_read=table->key_read;
       if ((tab->type == JT_NEXT || tab->type == JT_CONST) &&
           table->covering_keys.is_set(tab->index))
 	key_read=1;
@@ -16963,8 +20834,21 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       else
       {
+        uint keyno= MAX_KEY;
+        if (tab->ref.key_parts)
+          keyno= tab->ref.key;
+        else if (tab->select && tab->select->quick)
+          keyno = tab->select->quick->index;
+
+        if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno &&
+            table->file->pushed_idx_cond)
+          extra.append(STRING_WITH_LEN("; Using index condition"));
+        else if (tab->cache_idx_cond)
+          extra.append(STRING_WITH_LEN("; Using index condition(BKA)"));
+
         if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
             quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+            quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
             quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
         {
           extra.append(STRING_WITH_LEN("; Using "));
@@ -17000,7 +20884,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
               extra.append(STRING_WITH_LEN("; Using where"));
           }
 	}
-        if (table_list->schema_table &&
+        if (table_list /* SJM bushes don't have table_list */ &&
+            table_list->schema_table &&
             table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
         {
           if (!table_list->table_open_method)
@@ -17032,6 +20917,32 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         }
 	if (table->reginfo.not_exists_optimize)
 	  extra.append(STRING_WITH_LEN("; Not exists"));
+
+        /*
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE &&
+            !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags &
+             HA_MRR_USE_DEFAULT_IMPL))
+        {
+	  extra.append(STRING_WITH_LEN("; Using MRR"));
+        }
+        */
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE)
+        {
+          char mrr_str_buf[128];
+          mrr_str_buf[0]=0;
+          int len;
+          uint mrr_flags= 
+            ((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags;
+          len= table->file->multi_range_read_explain_info(mrr_flags,
+                                                          mrr_str_buf,
+                                                          sizeof(mrr_str_buf));
+          if (len > 0)
+          {
+            extra.append(STRING_WITH_LEN("; "));
+            extra.append(mrr_str_buf, len);
+          }
+        }
+
 	if (need_tmp_table)
 	{
 	  need_tmp_table=0;
@@ -17042,8 +20953,40 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	  need_order=0;
 	  extra.append(STRING_WITH_LEN("; Using filesort"));
 	}
-	if (distinct & test_all_bits(used_tables, thd->lex->used_tables))
+	if (distinct & test_all_bits(used_tables,
+                                     join->select_list_used_tables))
 	  extra.append(STRING_WITH_LEN("; Distinct"));
+        if (tab->loosescan_match_tab)
+        {
+          extra.append(STRING_WITH_LEN("; LooseScan"));
+        }
+
+        if (tab->flush_weedout_table)
+          extra.append(STRING_WITH_LEN("; Start temporary"));
+        if (tab->check_weed_out_table)
+          extra.append(STRING_WITH_LEN("; End temporary"));
+        else if (tab->do_firstmatch)
+        {
+          if (tab->do_firstmatch == join->join_tab - 1)
+            extra.append(STRING_WITH_LEN("; FirstMatch"));
+          else
+          {
+            extra.append(STRING_WITH_LEN("; FirstMatch("));
+            TABLE *prev_table=tab->do_firstmatch->table;
+            if (prev_table->derived_select_number)
+            {
+              char namebuf[NAME_LEN];
+              /* Derived table name generation */
+              int len= my_snprintf(namebuf, sizeof(namebuf)-1,
+                                   "<derived%u>",
+                                   prev_table->derived_select_number);
+              extra.append(namebuf, len);
+            }
+            else
+              extra.append(prev_table->pos_in_table_list->alias);
+            extra.append(STRING_WITH_LEN(")"));
+          }
+        }
 
         for (uint part= 0; part < tab->ref.key_parts; part++)
         {
@@ -17053,8 +20996,12 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
             break;
           }
         }
-        if (i > 0 && tab[-1].next_select == sub_select_cache)
+
+        if (tab->cache)
+	{
           extra.append(STRING_WITH_LEN("; Using join buffer"));
+          tab->cache->print_explain_comment(&extra);
+        }
         
         /* Skip initial "; "*/
         const char *str= extra.ptr();
@@ -17066,6 +21013,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         }
 	item_list.push_back(new Item_string(str, len, cs));
       }
+      
       // For next iteration
       used_tables|=table->map;
       if (result->send_data(item_list))
@@ -17076,8 +21024,31 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
        unit;
        unit= unit->next_unit())
   {
-    if (mysql_explain_union(thd, unit, result))
-      DBUG_VOID_RETURN;
+    /*
+      This fix_fields() call is to handle an edge case like this:
+       
+        SELECT ... UNION SELECT ... ORDER BY (SELECT ...)
+      
+      for such queries, we'll get here before having called
+      subquery_expr->fix_fields(), which will cause failure to
+    */
+    if (unit->item && !unit->item->fixed)
+    {
+      Item *ref= unit->item;
+      if (unit->item->fix_fields(thd, &ref))
+        DBUG_VOID_RETURN;
+      DBUG_ASSERT(ref == unit->item);
+    }
+
+    /* 
+      Display subqueries only if they are not parts of eliminated WHERE/ON
+      clauses.
+    */
+    if (!(unit->item && unit->item->eliminated))
+    {
+      if (mysql_explain_union(thd, unit, result))
+        DBUG_VOID_RETURN;
+    }
   }
   DBUG_VOID_RETURN;
 }
@@ -17089,28 +21060,12 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
   bool res= 0;
   SELECT_LEX *first= unit->first_select();
 
-  for (SELECT_LEX *sl= first;
-       sl;
-       sl= sl->next_select())
-  {
-    // drop UNCACHEABLE_EXPLAIN, because it is for internal usage only
-    uint8 uncacheable= (sl->uncacheable & ~UNCACHEABLE_EXPLAIN);
-    sl->type= (((&thd->lex->select_lex)==sl)?
-	       (sl->first_inner_unit() || sl->next_select() ? 
-		"PRIMARY" : "SIMPLE"):
-	       ((sl == first)?
-		((sl->linkage == DERIVED_TABLE_TYPE) ?
-		 "DERIVED":
-		 ((uncacheable & UNCACHEABLE_DEPENDENT) ?
-		  "DEPENDENT SUBQUERY":
-		  (uncacheable?"UNCACHEABLE SUBQUERY":
-		   "SUBQUERY"))):
-		((uncacheable & UNCACHEABLE_DEPENDENT) ?
-		 "DEPENDENT UNION":
-		 uncacheable?"UNCACHEABLE UNION":
-		 "UNION")));
+  for (SELECT_LEX *sl= first; sl; sl= sl->next_select())
+  {
+    sl->set_explain_type();
     sl->options|= SELECT_DESCRIBE;
   }
+
   if (unit->is_union())
   {
     unit->fake_select_lex->select_number= UINT_MAX; // jost for initialization
@@ -17118,7 +21073,6 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
     unit->fake_select_lex->options|= SELECT_DESCRIBE;
     if (!(res= unit->prepare(thd, result, SELECT_NO_UNLOCK | SELECT_DESCRIBE)))
       res= unit->exec();
-    res|= unit->cleanup();
   }
   else
   {
@@ -17141,6 +21095,53 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
 }
 
 
+static void print_table_array(THD *thd, 
+                              table_map eliminated_tables,
+                              String *str, TABLE_LIST **table, 
+                              TABLE_LIST **end,
+                              enum_query_type query_type)
+{
+  (*table)->print(thd, eliminated_tables, str, query_type);
+
+  for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
+  {
+    TABLE_LIST *curr= *tbl;
+    
+    /*
+      The "eliminated_tables &&" check guards againist the case of 
+      printing the query for CREATE VIEW. We do that without having run 
+      JOIN::optimize() and so will have nested_join->used_tables==0.
+    */
+    if (eliminated_tables &&
+        ((curr->table && (curr->table->map & eliminated_tables)) ||
+         (curr->nested_join && !(curr->nested_join->used_tables &
+                                ~eliminated_tables))))
+    {
+      continue;
+    }
+
+    if (curr->outer_join)
+    {
+      /* MySQL converts right to left joins */
+      str->append(STRING_WITH_LEN(" left join "));
+    }
+    else if (curr->straight)
+      str->append(STRING_WITH_LEN(" straight_join "));
+    else if (curr->sj_inner_tables)
+      str->append(STRING_WITH_LEN(" semi join "));
+    else
+      str->append(STRING_WITH_LEN(" join "));
+    curr->print(thd, eliminated_tables, str, query_type);
+    if (curr->on_expr)
+    {
+      str->append(STRING_WITH_LEN(" on("));
+      curr->on_expr->print(str, query_type);
+      str->append(')');
+    }
+  }
+}
+
+
 /**
   Print joins from the FROM clause.
 
@@ -17151,6 +21152,7 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
 */
 
 static void print_join(THD *thd,
+                       table_map eliminated_tables,
                        String *str,
                        List<TABLE_LIST> *tables,
                        enum_query_type query_type)
@@ -17183,32 +21185,36 @@ static void print_join(THD *thd,
   }
 
   DBUG_ASSERT(tables->elements >= 1);
-  (*table)->print(thd, str, query_type);
-
-  TABLE_LIST **end= table + non_const_tables;
-  for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
+  /*
+    Assert that the first table in the list isn't eliminated. This comes from
+    the fact that the first table can't be inner table of an outer join.
+  */
+  DBUG_ASSERT(!eliminated_tables || 
+              !(((*table)->table && ((*table)->table->map & eliminated_tables)) ||
+                ((*table)->nested_join && !((*table)->nested_join->used_tables &
+                                           ~eliminated_tables))));
+  /* 
+    If the first table is a semi-join nest, swap it with something that is
+    not a semi-join nest.
+  */
+  if ((*table)->sj_inner_tables)
   {
-    TABLE_LIST *curr= *tbl;
-    if (curr->outer_join)
-    {
-      /* MySQL converts right to left joins */
-      str->append(STRING_WITH_LEN(" left join "));
-    }
-    else if (curr->straight)
-      str->append(STRING_WITH_LEN(" straight_join "));
-    else
-      str->append(STRING_WITH_LEN(" join "));
-    curr->print(thd, str, query_type);
-    if (curr->on_expr)
+    TABLE_LIST **end= table + non_const_tables;
+    for (TABLE_LIST **t2= table; t2!=end; t2++)
     {
-      str->append(STRING_WITH_LEN(" on("));
-      curr->on_expr->print(str, query_type);
-      str->append(')');
+      if (!(*t2)->sj_inner_tables)
+      {
+        TABLE_LIST *tmp= *t2;
+        *t2= *table;
+        *table= tmp;
+        break;
+      }
     }
   }
+  print_table_array(thd, eliminated_tables, str, table, 
+                    table +  non_const_tables, query_type);
 }
 
-
 /**
   @brief Print an index hint
 
@@ -17252,12 +21258,21 @@ Index_hint::print(THD *thd, String *str)
   @param str   string where table should be printed
 */
 
-void TABLE_LIST::print(THD *thd, String *str, enum_query_type query_type)
+void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, 
+                       enum_query_type query_type)
 {
   if (nested_join)
   {
     str->append('(');
-    print_join(thd, str, &nested_join->join_list, query_type);
+    print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type);
+    str->append(')');
+  }
+  else if (jtbm_subselect)
+  {
+    str->append(STRING_WITH_LEN(" <materialize> ("));
+    subselect_hash_sj_engine *hash_engine;
+    hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine;
+    hash_engine->materialize_engine->print(str, query_type);
     str->append(')');
   }
   else
@@ -17342,9 +21357,7 @@ void TABLE_LIST::print(THD *thd, String *str, enum_query_type query_type)
 
 void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
 {
-  /* QQ: thd may not be set for sub queries, but this should be fixed */
-  if (!thd)
-    thd= current_thd;
+  DBUG_ASSERT(thd);
 
   str->append(STRING_WITH_LEN("select "));
 
@@ -17388,7 +21401,7 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
     else
       str->append(',');
 
-    if (master_unit()->item && item->is_autogenerated_name)
+    if (is_subquery_function() && item->is_autogenerated_name)
     {
       /*
         Do not print auto-generated aliases in subqueries. It has no purpose
@@ -17408,7 +21421,7 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
   {
     str->append(STRING_WITH_LEN(" from "));
     /* go through join tree */
-    print_join(thd, str, &top_join_list, query_type);
+    print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list, query_type);
   }
   else if (where)
   {
@@ -17492,6 +21505,8 @@ bool JOIN::change_result(select_result *res)
 {
   DBUG_ENTER("JOIN::change_result");
   result= res;
+  if (tmp_join)
+    tmp_join->result= res;
   if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
                      result->prepare2()))
   {
@@ -17500,6 +21515,189 @@ bool JOIN::change_result(select_result *res)
   DBUG_RETURN(FALSE);
 }
 
+
+/**
+  @brief
+  Set allowed types of join caches that can be used for join operations
+
+  @details
+  The function sets a bitmap of allowed join buffers types in the field
+  allowed_join_cache_types of this JOIN structure:
+    bit 1 is set if tjoin buffers are allowed to be incremental
+    bit 2 is set if the join buffers are allowed to be hashed
+    but 3 is set if the join buffers are allowed to be used for BKA
+  join algorithms.
+  The allowed types are read from system variables.
+  Besides the function sets maximum allowed join cache level that is
+  also read from a system variable.
+*/
+
+void JOIN::set_allowed_join_cache_types()
+{
+  allowed_join_cache_types= 0;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL))
+    allowed_join_cache_types|= JOIN_CACHE_INCREMENTAL_BIT;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED))
+    allowed_join_cache_types|= JOIN_CACHE_HASHED_BIT;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA))
+    allowed_join_cache_types|= JOIN_CACHE_BKA_BIT;
+  allowed_semijoin_with_cache=
+    optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE);
+  allowed_outer_join_with_cache=
+    optimizer_flag(thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE);
+  max_allowed_join_cache_level= thd->variables.join_cache_level;
+}
+
+
+/**
+  Save a query execution plan so that the caller can revert to it if needed,
+  and reset the current query plan so that it can be reoptimized.
+
+  @param save_to  The object into which the current query plan state is saved
+*/
+
+void JOIN::save_query_plan(Join_plan_state *save_to)
+{
+  if (keyuse.elements)
+  {
+    DYNAMIC_ARRAY tmp_keyuse;
+    /* Swap the current and the backup keyuse internal arrays. */
+    tmp_keyuse= keyuse;
+    keyuse= save_to->keyuse; /* keyuse is reset to an empty array. */
+    save_to->keyuse= tmp_keyuse;
+
+    for (uint i= 0; i < table_count; i++)
+    {
+      save_to->join_tab_keyuse[i]= join_tab[i].keyuse;
+      join_tab[i].keyuse= NULL;
+      save_to->join_tab_checked_keys[i]= join_tab[i].checked_keys;
+      join_tab[i].checked_keys.clear_all();
+    }
+  }
+  memcpy((uchar*) save_to->best_positions, (uchar*) best_positions,
+         sizeof(POSITION) * (table_count + 1));
+  memset(best_positions, 0, sizeof(POSITION) * (table_count + 1));
+}
+
+
+/**
+  Restore a query execution plan previously saved by the caller.
+
+  @param The object from which the current query plan state is restored.
+*/
+
+void JOIN::restore_query_plan(Join_plan_state *restore_from)
+{
+  if (restore_from->keyuse.elements)
+  {
+    DYNAMIC_ARRAY tmp_keyuse;
+    tmp_keyuse= keyuse;
+    keyuse= restore_from->keyuse;
+    restore_from->keyuse= tmp_keyuse;
+
+    for (uint i= 0; i < table_count; i++)
+    {
+      join_tab[i].keyuse= restore_from->join_tab_keyuse[i];
+      join_tab[i].checked_keys= restore_from->join_tab_checked_keys[i];
+    }
+
+  }
+  memcpy((uchar*) best_positions, (uchar*) restore_from->best_positions,
+         sizeof(POSITION) * (table_count + 1));
+}
+
+
+/**
+  Reoptimize a query plan taking into account an additional conjunct to the
+  WHERE clause.
+
+  @param added_where  An extra conjunct to the WHERE clause to reoptimize with
+  @param join_tables  The set of tables to reoptimize
+  @param save_to      If != NULL, save here the state of the current query plan
+
+  @notes
+  Given a query plan that was already optimized taking into account some WHERE
+  clause 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'.
+  The reoptimization works as follows:
+
+  1. Call update_ref_and_keys *only* for the new conditions 'added_where'
+     that are about to be injected into the query.
+  2. Expand if necessary the original KEYUSE array JOIN::keyuse to
+     accommodate the new REF accesses computed for the 'added_where' condition.
+  3. Add the new KEYUSEs into JOIN::keyuse.
+  4. Re-sort and re-filter the JOIN::keyuse array with the newly added
+     KEYUSE elements. 
+ 
+  @retval REOPT_NEW_PLAN  there is a new plan.
+  @retval REOPT_OLD_PLAN  no new improved plan was produced, use the old one.
+  @retval REOPT_ERROR     an irrecovarable error occured during reoptimization.
+*/
+
+JOIN::enum_reopt_result
+JOIN::reoptimize(Item *added_where, table_map join_tables,
+                 Join_plan_state *save_to)
+{
+  DYNAMIC_ARRAY added_keyuse;
+  SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
+  uint org_keyuse_elements;
+
+  /* Re-run the REF optimizer to take into account the new conditions. */
+  if (update_ref_and_keys(thd, &added_keyuse, join_tab, table_count, added_where,
+                          ~outer_join, select_lex, &sargables))
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_ERROR;
+  }
+
+  if (!added_keyuse.elements)
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_OLD_PLAN;
+  }
+
+  if (save_to)
+    save_query_plan(save_to);
+
+  if (!keyuse.buffer &&
+      my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64))
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_ERROR;
+  }
+
+  org_keyuse_elements= save_to ? save_to->keyuse.elements : keyuse.elements;
+  allocate_dynamic(&keyuse, org_keyuse_elements + added_keyuse.elements);
+
+  /* If needed, add the access methods from the original query plan. */
+  if (save_to)
+  {
+    DBUG_ASSERT(!keyuse.elements);
+    memcpy(keyuse.buffer,
+           save_to->keyuse.buffer,
+           (size_t) save_to->keyuse.elements * keyuse.size_of_element);
+    keyuse.elements= save_to->keyuse.elements;
+  }
+
+  /* Add the new access methods to the keyuse array. */
+  memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
+         added_keyuse.buffer,
+         (size_t) added_keyuse.elements * added_keyuse.size_of_element);
+  keyuse.elements+= added_keyuse.elements;
+  /* added_keyuse contents is copied, and it is no longer needed. */
+  delete_dynamic(&added_keyuse);
+
+  if (sort_and_filter_keyuse(thd, &keyuse, true))
+    return REOPT_ERROR;
+  optimize_keyuse(this, &keyuse);
+
+  /* Re-run the join optimizer to compute a new query plan. */
+  if (choose_plan(this, join_tables))
+    return REOPT_ERROR;
+
+  return REOPT_NEW_PLAN;
+}
+
+
 /**
   Cache constant expressions in WHERE, HAVING, ON conditions.
 */
@@ -17510,7 +21708,7 @@ void JOIN::cache_const_exprs()
   bool *analyzer_arg= &cache_flag;
 
   /* No need in cache if all tables are constant. */
-  if (const_tables == tables)
+  if (const_tables == table_count)
     return;
 
   if (conds)
@@ -17521,7 +21719,8 @@ void JOIN::cache_const_exprs()
     having->compile(&Item::cache_const_expr_analyzer, (uchar **)&analyzer_arg,
                     &Item::cache_const_expr_transformer, (uchar *)&cache_flag);
 
-  for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++)
+  for (JOIN_TAB *tab= first_depth_first_tab(this); tab;
+       tab= next_depth_first_tab(this, tab))
   {
     if (*tab->on_expr_ref)
     {
@@ -17567,7 +21766,7 @@ void JOIN::cache_const_exprs()
 static bool
 test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
                          key_map usable_keys,  int ref_key,
-                         ha_rows select_limit,
+                         ha_rows select_limit_arg,
                          int *new_key, int *new_key_direction,
                          ha_rows *new_select_limit, uint *new_used_key_parts,
                          uint *saved_best_key_parts)
@@ -17599,7 +21798,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
     resolved with a key;  This is because filesort() is usually faster than
     retrieving all rows through an index.
   */
-  if (select_limit >= table_records)
+  if (select_limit_arg >= table_records)
   {
     keys= *table->file->keys_to_use_for_scanning();
     keys.merge(table->covering_keys);
@@ -17624,7 +21823,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
   {
     uint tablenr= tab - join->join_tab;
     read_time= join->best_positions[tablenr].read_time;
-    for (uint i= tablenr+1; i < join->tables; i++)
+    for (uint i= tablenr+1; i < join->table_count; i++)
       fanout*= join->best_positions[i].records_read; // fanout is always >= 1
   }
   else
@@ -17633,6 +21832,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
   for (nr=0; nr < table->s->keys ; nr++)
   {
     int direction;
+    ha_rows select_limit= select_limit_arg;
     uint used_key_parts;
 
     if (keys.is_set(nr) &&
@@ -17645,10 +21845,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
       */
       DBUG_ASSERT (ref_key != (int) nr);
 
-      bool is_covering= table->covering_keys.is_set(nr) ||
-                        (nr == table->s->primary_key &&
-                        table->file->primary_key_is_clustered());
-      
+      bool is_covering= (table->covering_keys.is_set(nr) ||
+                         (table->file->index_flags(nr, 0, 1) &
+                          HA_CLUSTERED_INDEX));
       /* 
         Don't use an index scan with ORDER BY without limit.
         For GROUP BY without limit always use index scan
@@ -17675,9 +21874,47 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
             key (e.g. as in Innodb). 
             See Bug #28591 for details.
           */  
-          rec_per_key= used_key_parts &&
-                       used_key_parts <= keyinfo->key_parts ?
-                       keyinfo->rec_per_key[used_key_parts-1] : 1;
+          uint used_index_parts= keyinfo->key_parts;
+          uint used_pk_parts= 0;
+          if (used_key_parts > used_index_parts)
+            used_pk_parts= used_key_parts-used_index_parts;
+          rec_per_key= keyinfo->rec_per_key[used_key_parts-1];
+          /* Take into account the selectivity of the used pk prefix */
+          if (used_pk_parts)
+	  {
+            KEY *pkinfo=tab->table->key_info+table->s->primary_key;
+            /*
+              If the values of of records per key for the prefixes
+              of the primary key are considered unknown we assume
+              they are equal to 1.
+	    */
+            if (used_key_parts == pkinfo->key_parts ||
+                pkinfo->rec_per_key[0] == 0)
+              rec_per_key= 1;                 
+            if (rec_per_key > 1)
+	    {
+              rec_per_key*= pkinfo->rec_per_key[used_pk_parts-1];
+              rec_per_key/= pkinfo->rec_per_key[0];
+              /* 
+                The value of rec_per_key for the extended key has
+                to be adjusted accordingly if some components of
+                the secondary key are included in the primary key.
+	      */
+               for(uint i= 0; i < used_pk_parts; i++)
+	      {
+	        if (pkinfo->key_part[i].field->key_start.is_set(nr))
+	        {
+                  /* 
+                    We presume here that for any index rec_per_key[i] != 0
+                    if rec_per_key[0] != 0.
+	          */
+                  DBUG_ASSERT(pkinfo->rec_per_key[i]);
+                  rec_per_key*= pkinfo->rec_per_key[i-1];
+                  rec_per_key/= pkinfo->rec_per_key[i];
+                }
+	      }
+            }    
+          }
           set_if_bigger(rec_per_key, 1);
           /*
             With a grouping query each group containing on average
@@ -17688,7 +21925,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
               select_limit= table_records;
           else
             select_limit= (ha_rows) (select_limit*rec_per_key);
-        }
+        } /* group */
+
         /* 
           If tab=tk is not the last joined table tn then to get first
           L records from the result set we can expect to retrieve
@@ -17732,8 +21970,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
         */
         index_scan_time= select_limit/rec_per_key *
                          min(rec_per_key, table->file->scan_time());
-        if ((ref_key < 0 && is_covering) || 
-            (ref_key < 0 && (group || table->force_index)) ||
+        if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
             index_scan_time < read_time)
         {
           ha_rows quick_records= table_records;
@@ -17745,7 +21982,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
           if (best_key < 0 ||
               (select_limit <= min(quick_records,best_records) ?
                keyinfo->key_parts < best_key_parts :
-               quick_records < best_records))
+               quick_records < best_records) ||
+              (!is_best_covering && is_covering))
           {
             best_key= nr;
             best_key_parts= keyinfo->key_parts;
@@ -17800,16 +22038,6 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
 uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
                          ha_rows limit, bool *need_sort, bool *reverse)
 {
-  if (select && select->quick && select->quick->unique_key_range())
-  { // Single row select (always "ordered"): Ok to use with key field UPDATE
-    *need_sort= FALSE;
-    /*
-      Returning of MAX_KEY here prevents updating of used_key_is_modified
-      in mysql_update(). Use quick select "as is".
-    */
-    return MAX_KEY;
-  }
-
   if (!order)
   {
     *need_sort= FALSE;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index c4f5dcba115..d87504f719c 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1,7 +1,8 @@
 #ifndef SQL_SELECT_INCLUDED
 #define SQL_SELECT_INCLUDED
 
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -16,7 +17,6 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 
-
 /**
   @file
 
@@ -35,6 +35,23 @@
 #include "opt_range.h"                /* SQL_SELECT, QUICK_SELECT_I */
 
 
+#if defined(WITH_ARIA_STORAGE_ENGINE)
+#include "../storage/maria/ha_maria.h"
+#endif
+#if defined(USE_ARIA_FOR_TMP_TABLES)
+#define TMP_ENGINE_HTON maria_hton
+#else
+#define TMP_ENGINE_HTON myisam_hton
+#endif
+/* Values in optimize */
+#define KEY_OPTIMIZE_EXISTS		1
+#define KEY_OPTIMIZE_REF_OR_NULL	2
+#define KEY_OPTIMIZE_EQ	                4
+
+inline uint get_hash_join_key_no() { return MAX_KEY; }
+
+inline bool is_hash_join_key_no(uint key) { return key == MAX_KEY; }
+
 typedef struct keyuse_t {
   TABLE *table;
   Item	*val;				/**< or value if no field */
@@ -58,10 +75,19 @@ typedef struct keyuse_t {
     NULL  - Otherwise (the source equality can't be turned off)
   */
   bool *cond_guard;
+  /*
+     0..64    <=> This was created from semi-join IN-equality # sj_pred_no.
+     MAX_UINT  Otherwise
+  */
+  uint         sj_pred_no;
+
+  bool is_for_hash_join() { return is_hash_join_key_no(key); }
 } KEYUSE;
 
 class store_key;
 
+const int NO_REF_PART= uint(-1);
+
 typedef struct st_table_ref
 {
   bool		key_err;
@@ -92,39 +118,32 @@ typedef struct st_table_ref
   */
   key_part_map  null_rejecting;
   table_map	depend_map;		  ///< Table depends on these tables.
+
   /* null byte position in the key_buf. Used for REF_OR_NULL optimization */
   uchar          *null_ref_key;
+  /* 
+    ref_or_null optimization: number of key part that alternates between
+    the lookup value or NULL (there's only one such part). 
+    If we're not using ref_or_null, the value is NO_REF_PART
+  */
+  uint           null_ref_part;
+
   /*
     The number of times the record associated with this key was used
     in the join.
   */
   ha_rows       use_count;
-} TABLE_REF;
 
+  /*
+    TRUE <=> disable the "cache" as doing lookup with the same key value may
+    produce different results (because of Index Condition Pushdown)
 
+  */
+  bool          disable_cache;
 
-#define CACHE_BLOB      1        /* blob field  */
-#define CACHE_STRIPPED  2        /* field stripped of trailing spaces */
-
-/**
-  CACHE_FIELD and JOIN_CACHE is used on full join to cache records in outer
-  table
-*/
-
-typedef struct st_cache_field {
-  uchar *str;
-  uint length, blob_length;
-  Field *field;
-  uint type;    /**< category of the of the copied field (CACHE_BLOB et al.) */
-} CACHE_FIELD;
-
-
-typedef struct st_join_cache {
-  uchar *buff,*pos,*end;
-  uint records,record_nr,ptr_record,fields,length,blobs;
-  CACHE_FIELD *field,**blob_ptr;
-  SQL_SELECT *select;
-} JOIN_CACHE;
+  bool tmp_table_index_lookup_init(THD *thd, KEY *tmp_key, Item_iterator &it,
+                                   bool value, uint skip= 0);
+} TABLE_REF;
 
 
 /*
@@ -132,7 +151,8 @@ typedef struct st_join_cache {
 */
 enum join_type { JT_UNKNOWN,JT_SYSTEM,JT_CONST,JT_EQ_REF,JT_REF,JT_MAYBE_REF,
 		 JT_ALL, JT_RANGE, JT_NEXT, JT_FT, JT_REF_OR_NULL,
-		 JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY, JT_INDEX_MERGE};
+		 JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY, JT_INDEX_MERGE,
+                 JT_HASH, JT_HASH_RANGE, JT_HASH_NEXT, JT_HASH_INDEX_MERGE};
 
 class JOIN;
 
@@ -153,16 +173,40 @@ enum enum_nested_loop_state
 typedef enum_nested_loop_state
 (*Next_select_func)(JOIN *, struct st_join_table *, bool);
 Next_select_func setup_end_select_func(JOIN *join);
+int rr_sequential(READ_RECORD *info);
+int rr_sequential_and_unpack(READ_RECORD *info);
 
 
+class JOIN_CACHE;
+class SJ_TMP_TABLE;
+class JOIN_TAB_RANGE;
+
 typedef struct st_join_table {
   st_join_table() {}                          /* Remove gcc warning */
   TABLE		*table;
   KEYUSE	*keyuse;			/**< pointer to first used key */
+  KEY           *hj_key;       /**< descriptor of the used best hash join key
+				    not supported by any index                 */
   SQL_SELECT	*select;
   COND		*select_cond;
+  COND          *on_precond;    /**< part of on condition to check before
+				     accessing the first inner table           */  
   QUICK_SELECT_I *quick;
-  Item	       **on_expr_ref;   /**< pointer to the associated on expression   */
+  /* 
+    The value of select_cond before we've attempted to do Index Condition
+    Pushdown. We may need to restore everything back if we first choose one
+    index but then reconsider (see test_if_skip_sort_order() for such
+    scenarios).
+    NULL means no index condition pushdown was performed.
+  */
+  Item          *pre_idx_push_select_cond;
+  /*
+    Pointer to the associated ON expression. on_expr_ref=!NULL except for
+    degenerate joins. 
+    *on_expr_ref!=NULL for tables that are first inner tables within an outer
+    join.
+  */
+  Item	       **on_expr_ref;
   COND_EQUAL    *cond_equal;    /**< multiple equalities for the on expression */
   st_join_table *first_inner;   /**< first inner table for including outerjoin */
   bool           found;         /**< true after all matches or null complement */
@@ -170,6 +214,21 @@ typedef struct st_join_table {
   st_join_table *last_inner;    /**< last table table for embedding outer join */
   st_join_table *first_upper;  /**< first inner table for embedding outer join */
   st_join_table *first_unmatched; /**< used for optimization purposes only     */
+
+  /*
+    For join tabs that are inside an SJM bush: root of the bush
+  */
+  st_join_table *bush_root_tab;
+
+  /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */
+  bool          last_leaf_in_bush;
+  
+  /*
+    ptr  - this is a bush, and ptr points to description of child join_tab
+           range
+    NULL - this join tab has no bush children
+  */
+  JOIN_TAB_RANGE *bush_children;
   
   /* Special content for EXPLAIN 'Extra' column or NULL if none */
   const char	*info;
@@ -207,12 +266,26 @@ typedef struct st_join_table {
     method (but not 'index' for some reason), i.e. this matches method which
     E(#records) is in found_records.
   */
-  ha_rows       read_time;
+  double        read_time;
+  
+  /* psergey-todo: make the below have type double, like POSITION::records_read? */
+  ha_rows       records_read;
   
+  /* Startup cost for execution */
+  double        startup_cost;
+    
+  double        partial_join_cardinality;
+
   table_map	dependent,key_dependent;
   uint		use_quick,index;
   uint		status;				///< Save status for cache
-  uint		used_fields,used_fieldlength,used_blobs;
+  uint		used_fields;
+  ulong         used_fieldlength;
+  ulong         max_used_fieldlength;
+  uint          used_blobs;
+  uint          used_null_fields;
+  uint          used_rowid_fields;
+  uint          used_uneven_bit_fields;
   enum join_type type;
   bool		cached_eq_ref_table,eq_ref_table,not_used_in_distinct;
   bool		sorted;
@@ -223,11 +296,83 @@ typedef struct st_join_table {
   */ 
   ha_rows       limit; 
   TABLE_REF	ref;
-  JOIN_CACHE	cache;
+  /* TRUE <=> condition pushdown supports other tables presence */
+  bool          icp_other_tables_ok;
+  /* 
+    TRUE <=> condition pushed to the index has to be factored out of
+    the condition pushed to the table
+  */
+  bool          idx_cond_fact_out;
+  bool          use_join_cache;
+  uint          used_join_cache_level;
+  ulong         join_buffer_size_limit;
+  JOIN_CACHE	*cache;
+  /*
+    Index condition for BKA access join
+  */
+  Item          *cache_idx_cond;
+  SQL_SELECT    *cache_select;
   JOIN		*join;
-  /** Bitmap of nested joins this table is part of */
+  /*
+    Embedding SJ-nest (may be not the direct parent), or NULL if none.
+    This variable holds the result of table pullout.
+  */
+  TABLE_LIST    *emb_sj_nest;
+
+  /* FirstMatch variables (final QEP) */
+  struct st_join_table *first_sj_inner_tab;
+  struct st_join_table *last_sj_inner_tab;
+
+  /* Variables for semi-join duplicate elimination */
+  SJ_TMP_TABLE  *flush_weedout_table;
+  SJ_TMP_TABLE  *check_weed_out_table;
+  
+  /*
+    If set, means we should stop join enumeration after we've got the first
+    match and return to the specified join tab. May point to
+    join->join_tab[-1] which means stop join execution after the first
+    match.
+  */
+  struct st_join_table  *do_firstmatch;
+ 
+  /* 
+     ptr  - We're doing a LooseScan, this join tab is the first (i.e. 
+            "driving") join tab), and ptr points to the last join tab
+            handled by the strategy. loosescan_match_tab->found_match
+            should be checked to see if the current value group had a match.
+     NULL - Not doing a loose scan on this join tab.
+  */
+  struct st_join_table *loosescan_match_tab;
+
+  /* Buffer to save index tuple to be able to skip duplicates */
+  uchar *loosescan_buf;
+  
+  /* Length of key tuple (depends on #keyparts used) to store in the above */
+  uint loosescan_key_len;
+
+  /* Used by LooseScan. TRUE<=> there has been a matching record combination */
+  bool found_match;
+  
+  /*
+    Used by DuplicateElimination. tab->table->ref must have the rowid
+    whenever we have a current record.
+  */
+  int  keep_current_rowid;
+
+  /* NestedOuterJoins: Bitmap of nested joins this table is part of */
   nested_join_map embedding_map;
 
+  /*
+    Semi-join strategy to be used for this join table. This is a copy of
+    POSITION::sj_strategy field. This field is set up by the
+    fix_semijoin_strategies_for_picked_join_order.
+  */
+  uint sj_strategy;
+
+  uint n_sj_tables;
+
+  bool preread_init_done;
+
   void cleanup();
   inline bool is_using_loose_index_scan()
   {
@@ -240,12 +385,124 @@ typedef struct st_join_table {
     return (is_using_loose_index_scan() &&
             ((QUICK_GROUP_MIN_MAX_SELECT *)select->quick)->is_agg_distinct());
   }
+  bool check_rowid_field()
+  {
+    if (keep_current_rowid && !used_rowid_fields)
+    {
+      used_rowid_fields= 1;
+      used_fieldlength+= table->file->ref_length;
+    }
+    return test(used_rowid_fields);
+  }
+  bool is_inner_table_of_semi_join_with_first_match()
+  {
+    return first_sj_inner_tab != NULL;
+  }
+  bool is_inner_table_of_outer_join()
+  {
+    return first_inner != NULL;
+  }
+  bool is_single_inner_of_semi_join_with_first_match()
+  {
+    return first_sj_inner_tab == this && last_sj_inner_tab == this;            
+  }
+  bool is_single_inner_of_outer_join()
+  {
+    return first_inner == this && first_inner->last_inner == this;
+  }
+  bool is_first_inner_for_outer_join()
+  {
+    return first_inner && first_inner == this;
+  }
+  bool use_match_flag()
+  {
+    return is_first_inner_for_outer_join() || first_sj_inner_tab == this ; 
+  }
+  bool check_only_first_match()
+  {
+    return is_inner_table_of_semi_join_with_first_match() ||
+           (is_inner_table_of_outer_join() &&
+            table->reginfo.not_exists_optimize);
+  }
+  bool is_last_inner_table()
+  {
+    return (first_inner && first_inner->last_inner == this) ||
+           last_sj_inner_tab == this;
+  }
+  /*
+    Check whether the table belongs to a nest of inner tables of an
+    outer join or to a nest of inner tables of a semi-join
+  */
+  bool is_nested_inner()
+  {
+    if (first_inner && 
+        (first_inner != first_inner->last_inner || first_inner->first_upper))
+      return TRUE;
+    if (first_sj_inner_tab && first_sj_inner_tab != last_sj_inner_tab)
+      return TRUE;
+    return FALSE;
+  }
+  struct st_join_table *get_first_inner_table()
+  {
+    if (first_inner)
+      return first_inner;
+    return first_sj_inner_tab; 
+  }
+  void set_select_cond(COND *to, uint line)
+  {
+    DBUG_PRINT("info", ("select_cond changes %p -> %p at line %u tab %p",
+                        select_cond, to, line, this));
+    select_cond= to;
+  }
+  COND *set_cond(COND *new_cond)
+  {
+    COND *tmp_select_cond= select_cond;
+    set_select_cond(new_cond, __LINE__);
+    if (select)
+      select->cond= new_cond;
+    return tmp_select_cond;
+  }
+  void calc_used_field_length(bool max_fl);
+  ulong get_used_fieldlength()
+  {
+    if (!used_fieldlength)
+      calc_used_field_length(FALSE);
+    return used_fieldlength;
+  }
+  ulong get_max_used_fieldlength()
+  {
+    if (!max_used_fieldlength)
+      calc_used_field_length(TRUE);
+    return max_used_fieldlength;
+  }
+  double get_partial_join_cardinality() { return partial_join_cardinality; }
+  bool hash_join_is_possible();
+  int make_scan_filter();
+  bool is_ref_for_hash_join() { return is_hash_join_key_no(ref.key); }
+  KEY *get_keyinfo_by_key_no(uint key) 
+  {
+    return (is_hash_join_key_no(key) ? hj_key : table->key_info+key);
+  }
+  double scan_time();
+  bool preread_init();
+
+  bool is_sjm_nest() { return test(bush_children); }
 } JOIN_TAB;
 
+
+#include "sql_join_cache.h"
+
 enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
                                         end_of_records);
 enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
                                   end_of_records);
+enum_nested_loop_state
+end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
+	       bool end_of_records);
+enum_nested_loop_state
+end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
+		bool end_of_records);
+
 
 /**
   Information about a position of table within a join order. Used in join
@@ -276,6 +533,93 @@ typedef struct st_position
 
   /* If ref-based access is used: bitmap of tables this table depends on  */
   table_map ref_depend_map;
+
+  bool use_join_buffer; 
+  
+  
+  /* These form a stack of partial join order costs and output sizes */
+  COST_VECT prefix_cost;
+  double    prefix_record_count;
+
+  /*
+    Current optimization state: Semi-join strategy to be used for this
+    and preceding join tables.
+    
+    Join optimizer sets this for the *last* join_tab in the
+    duplicate-generating range. That is, in order to interpret this field, 
+    one needs to traverse join->[best_]positions array from right to left.
+    When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
+    field (depending on the strategy) tells how many preceding positions 
+    this applies to. The values of covered_preceding_positions->sj_strategy
+    must be ignored.
+  */
+  uint sj_strategy;
+  /*
+    Valid only after fix_semijoin_strategies_for_picked_join_order() call:
+    if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
+    are covered by the specified semi-join strategy
+  */
+  uint n_sj_tables;
+
+/* LooseScan strategy members */
+
+  /* The first (i.e. driving) table we're doing loose scan for */
+  uint        first_loosescan_table;
+  /* 
+     Tables that need to be in the prefix before we can calculate the cost
+     of using LooseScan strategy.
+  */
+  table_map   loosescan_need_tables;
+
+  /*
+    keyno  -  Planning to do LooseScan on this key. If keyuse is NULL then 
+              this is a full index scan, otherwise this is a ref+loosescan
+              scan (and keyno matches the KEUSE's)
+    MAX_KEY - Not doing a LooseScan
+  */
+  uint loosescan_key;  // final (one for strategy instance )
+  uint loosescan_parts; /* Number of keyparts to be kept distinct */
+  
+/* FirstMatch strategy */
+  /*
+    Index of the first inner table that we intend to handle with this
+    strategy
+  */
+  uint first_firstmatch_table;
+  /*
+    Tables that were not in the join prefix when we've started considering 
+    FirstMatch strategy.
+  */
+  table_map first_firstmatch_rtbl;
+  /* 
+    Tables that need to be in the prefix before we can calculate the cost
+    of using FirstMatch strategy.
+   */
+  table_map firstmatch_need_tables;
+
+  bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
+  void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
+
+/* Duplicate Weedout strategy */
+  /* The first table that the strategy will need to handle */
+  uint  first_dupsweedout_table;
+  /*
+    Tables that we will need to have in the prefix to do the weedout step
+    (all inner and all outer that the involved semi-joins are correlated with)
+  */
+  table_map dupsweedout_tables;
+
+/* SJ-Materialization-Scan strategy */
+  /* The last inner table (valid once we're after it) */
+  uint      sjm_scan_last_inner;
+  /*
+    Tables that we need to have in the prefix to calculate the correct cost.
+    Basically, we need all inner tables and outer tables mentioned in the
+    semi-join's ON expression so we can correctly account for fanout.
+  */
+  table_map sjm_scan_need_tables;
+
+  table_map prefix_dups_producing_tables;
 } POSITION;
 
 
@@ -289,17 +633,115 @@ typedef struct st_rollup
 } ROLLUP;
 
 
+#define SJ_OPT_NONE 0
+#define SJ_OPT_DUPS_WEEDOUT 1
+#define SJ_OPT_LOOSE_SCAN   2
+#define SJ_OPT_FIRST_MATCH  3
+#define SJ_OPT_MATERIALIZE  4
+#define SJ_OPT_MATERIALIZE_SCAN  5
+
+inline bool sj_is_materialize_strategy(uint strategy)
+{
+  return strategy >= SJ_OPT_MATERIALIZE;
+}
+
+class JOIN_TAB_RANGE: public Sql_alloc
+{
+public:
+  JOIN_TAB *start;
+  JOIN_TAB *end;
+};
+
+
 class JOIN :public Sql_alloc
 {
+private:
   JOIN(const JOIN &rhs);                        /**< not implemented */
   JOIN& operator=(const JOIN &rhs);             /**< not implemented */
+
+protected:
+
+  /**
+    The subset of the state of a JOIN that represents an optimized query
+    execution plan. Allows saving/restoring different JOIN plans for the same
+    query.
+  */
+  class Join_plan_state {
+  public:
+    DYNAMIC_ARRAY keyuse; /* Copy of the JOIN::keyuse array. */
+    POSITION best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */
+    /* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */
+    KEYUSE *join_tab_keyuse[MAX_TABLES];
+    /* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */
+    key_map join_tab_checked_keys[MAX_TABLES];
+  public:
+    Join_plan_state()
+    {   
+      keyuse.elements= 0;
+      keyuse.buffer= NULL;
+    }
+    Join_plan_state(JOIN *join);
+    ~Join_plan_state()
+    {
+      delete_dynamic(&keyuse);
+    }
+  };
+
+  /* Results of reoptimizing a JOIN via JOIN::reoptimize(). */
+  enum enum_reopt_result {
+    REOPT_NEW_PLAN, /* there is a new reoptimized plan */
+    REOPT_OLD_PLAN, /* no new improved plan can be found, use the old one */
+    REOPT_ERROR,    /* an irrecovarable error occured during reoptimization */
+    REOPT_NONE      /* not yet reoptimized */
+  };
+
+  /* Support for plan reoptimization with rewritten conditions. */
+  enum_reopt_result reoptimize(Item *added_where, table_map join_tables,
+                               Join_plan_state *save_to);
+  void save_query_plan(Join_plan_state *save_to);
+  void restore_query_plan(Join_plan_state *restore_from);
+  /* Choose a subquery plan for a table-less subquery. */
+  bool choose_tableless_subquery_plan();
+
 public:
-  JOIN_TAB *join_tab,**best_ref;
+  JOIN_TAB *join_tab, **best_ref;
   JOIN_TAB **map2table;    ///< mapping between table indexes and JOIN_TABs
   JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution
-  TABLE    **all_tables,*sort_by_table;
-  uint	   tables,const_tables;
+
+  List<JOIN_TAB_RANGE> join_tab_ranges;
+  
+  /*
+    Base tables participating in the join. After join optimization is done, the
+    tables are stored in the join order (but the only really important part is 
+    that const tables are first).
+  */
+  TABLE    **table;
+  /**
+    The table which has an index that allows to produce the requried ordering.
+    A special value of 0x1 means that the ordering will be produced by
+    passing 1st non-const table to filesort(). NULL means no such table exists.
+  */
+  TABLE    *sort_by_table;
+  /* 
+    Number of tables in the join. 
+    (In MySQL, it is named 'tables' and is also the number of elements in 
+     join->join_tab array. In MariaDB, the latter is not true, so we've renamed
+     the variable)
+  */
+  uint	   table_count;
+  uint     outer_tables;  /**< Number of tables that are not inside semijoin */
+  uint     const_tables;
+  /* 
+    Number of tables in the top join_tab array. Normally this matches
+    (join_tab_ranges.head()->end - join_tab_ranges.head()->start). 
+    
+    We keep it here so that it is saved/restored with JOIN::restore_tmp.
+  */
+  uint     top_join_tab_count;
   uint	   send_group_parts;
+  bool	   group;          /**< If query contains GROUP BY clause */
+  bool     need_distinct;
+
   /**
     Indicates that grouping will be performed on the result set during
     query execution. This field belongs to query execution.
@@ -307,13 +749,24 @@ public:
     @see make_group_fields, alloc_group_fields, JOIN::exec
   */
   bool     sort_and_group; 
-  bool     first_record,full_join,group, no_field_update;
+  bool     first_record,full_join, no_field_update;
   bool	   do_send_rows;
-  table_map const_table_map,found_const_table_map;
+  table_map const_table_map;
+  /*
+    Constant tables for which we have found a row (as opposed to those for
+    which we didn't).
+  */
+  table_map found_const_table_map;
+  
+  /* Tables removed by table elimination. Set to 0 before the elimination. */
+  table_map eliminated_tables;
   /*
-     Bitmap of all inner tables from outer joins
+     Bitmap of all inner tables from outer joins (set at start of
+     make_join_statistics)
   */
   table_map outer_join;
+  /* Bitmap of tables used in the select list items */
+  table_map select_list_used_tables;
   ha_rows  send_records,found_records,examined_rows,row_limit, select_limit;
   /**
     Used to fetch no more than given amount of rows per one
@@ -325,15 +778,54 @@ public:
       - on each fetch iteration we add num_rows to fetch to fetch_limit
   */
   ha_rows  fetch_limit;
-  POSITION positions[MAX_TABLES+1],best_positions[MAX_TABLES+1];
+  /* Finally picked QEP. This is result of join optimization */
+  POSITION best_positions[MAX_TABLES+1];
+
+/******* Join optimization state members start *******/
+  /*
+    pointer - we're doing optimization for a semi-join materialization nest.
+    NULL    - otherwise
+  */
+  TABLE_LIST *emb_sjm_nest;
   
-  /* *
+  /* Current join optimization state */
+  POSITION positions[MAX_TABLES+1];
+  
+  /*
     Bitmap of nested joins embedding the position at the end of the current 
     partial join (valid only during join optimizer run).
   */
   nested_join_map cur_embedding_map;
+  
+  /*
+    Bitmap of inner tables of semi-join nests that have a proper subset of
+    their tables in the current join prefix. That is, of those semi-join
+    nests that have their tables both in and outside of the join prefix.
+  */
+  table_map cur_sj_inner_tables;
+  
+  /*
+    Bitmap of semi-join inner tables that are in the join prefix and for
+    which there's no provision for how to eliminate semi-join duplicates
+    they produce.
+  */
+  table_map cur_dups_producing_tables;
 
+  /* We also maintain a stack of join optimization states in * join->positions[] */
+/******* Join optimization state members end *******/
+  /*
+    The cost of best complete join plan found so far during optimization,
+    after optimization phase - cost of picked join order (not taking into
+    account the changes made by test_if_skip_sort_order()).
+  */
   double   best_read;
+  /*
+    Estimated result rows (fanout) of the join operation. If this is a subquery
+    that is reexecuted multiple times, this value includes the estiamted # of
+    reexecutions. This value is equal to the multiplication of all
+    join->positions[i].records_read of a JOIN.
+  */
+  double   record_count;
   List<Item> *fields;
   List<Cached_item> group_fields, group_fields_cache;
   TABLE    *tmp_table;
@@ -348,6 +840,15 @@ public:
   Item      *tmp_having; ///< To store having when processed temporary table
   Item      *having_history; ///< Store having for explain
   ulonglong  select_options;
+  /* 
+    Bitmap of allowed types of the join caches that
+    can be used for join operations
+  */
+  uint allowed_join_cache_types;
+  bool allowed_semijoin_with_cache;
+  bool allowed_outer_join_with_cache;
+  /* Maximum level of the join caches that can be used for join operations */ 
+  uint max_allowed_join_cache_level;
   select_result *result;
   TMP_TABLE_PARAM tmp_table_param;
   MYSQL_LOCK *lock;
@@ -363,24 +864,31 @@ public:
     the number of rows in it may vary from one subquery execution to another.
   */
   bool no_const_tables; 
+  /*
+    This flag is set if we call no_rows_in_result() as par of end_group().
+    This is used as a simple speed optimization to avoiding calling
+    restore_no_rows_in_result() in ::reinit()
+  */
+  bool no_rows_in_result_called;
   
   /**
     Copy of this JOIN to be used with temporary tables.
 
-    tmp_join is used when the JOIN needs to be "reusable" (e.g. in a subquery
-    that gets re-executed several times) and we know will use temporary tables
-    for materialization. The materialization to a temporary table overwrites the
-    JOIN structure to point to the temporary table after the materialization is
-    done. This is where tmp_join is used : it's a copy of the JOIN before the
-    materialization and is used in restoring before re-execution by overwriting
-    the current JOIN structure with the saved copy.
-    Because of this we should pay extra care of not freeing up helper structures
-    that are referenced by the original contents of the JOIN. We can check for
-    this by making sure the "current" join is not the temporary copy, e.g.
-    !tmp_join || tmp_join != join
+    tmp_join is used when the JOIN needs to be "reusable" (e.g. in a
+    subquery that gets re-executed several times) and we know will use
+    temporary tables for materialization. The materialization to a
+    temporary table overwrites the JOIN structure to point to the
+    temporary table after the materialization is done. This is where
+    tmp_join is used : it's a copy of the JOIN before the
+    materialization and is used in restoring before re-execution by
+    overwriting the current JOIN structure with the saved copy.
+    Because of this we should pay extra care of not freeing up helper
+    structures that are referenced by the original contents of the
+    JOIN. We can check for this by making sure the "current" join is
+    not the temporary copy, e.g.  !tmp_join || tmp_join != join
  
-    We should free these sub-structures at JOIN::destroy() if the "current" join
-    has a copy is not that copy.
+    We should free these sub-structures at JOIN::destroy() if the
+    "current" join has a copy is not that copy.
   */
   JOIN *tmp_join;
   ROLLUP rollup;				///< Used with rollup
@@ -426,9 +934,24 @@ public:
   ORDER *order, *group_list, *proc_param; //hold parameters of mysql_select
   COND *conds;                            // ---"---
   Item *conds_history;                    // store WHERE for explain
+  COND *outer_ref_cond;       ///<part of conds containing only outer references
   TABLE_LIST *tables_list;           ///<hold 'tables' parameter of mysql_select
   List<TABLE_LIST> *join_list;       ///< list of joined tables in reverse order
   COND_EQUAL *cond_equal;
+  COND_EQUAL *having_equal;
+  /*
+    Constant codition computed during optimization, but evaluated during
+    join execution. Typically expensive conditions that should not be
+    evaluated at optimization time.
+  */
+  Item *exec_const_cond;
+  /*
+    Constant ORDER and/or GROUP expressions that contain subqueries. Such
+    expressions need to evaluated to verify that the subquery indeed
+    returns a single row. The evaluation of such expressions is delayed
+    until query execution.
+  */
+  List<Item> exec_const_order_group_cond;
   SQL_SELECT *select;                ///<created in optimisation phase
   JOIN_TAB *return_tab;              ///<used only for outer joins
   Item **ref_pointer_array; ///<used pointer reference for this select
@@ -439,6 +962,19 @@ public:
   
   bool union_part; ///< this subselect is part of union 
   bool optimized; ///< flag to avoid double optimization in EXPLAIN
+  bool initialized; ///< flag to avoid double init_execution calls
+
+  /*
+    Additional WHERE and HAVING predicates to be considered for IN=>EXISTS
+    subquery transformation of a JOIN object.
+  */
+  Item *in_to_exists_where;
+  Item *in_to_exists_having;
+  
+  /* Temporary tables used to weed-out semi-join duplicates */
+  List<TABLE> sj_tmp_tables;
+  /* SJM nests that are executed with SJ-Materialization strategy */
+  List<SJ_MATERIALIZATION_INFO> sjm_info_list;
 
   /* 
     storage for caching buffers allocated during query execution. 
@@ -466,9 +1002,11 @@ public:
        select_result *result_arg)
   {
     join_tab= join_tab_save= 0;
-    all_tables= 0;
-    tables= 0;
+    table= 0;
+    table_count= 0;
+    top_join_tab_count= 0;
     const_tables= 0;
+    eliminated_tables= 0;
     join_list= 0;
     implicit_grouping= FALSE;
     sort_and_group= 0;
@@ -496,6 +1034,7 @@ public:
     no_order= 0;
     simple_order= 0;
     simple_group= 0;
+    need_distinct= 0;
     skip_sort_order= 0;
     need_tmp= 0;
     hidden_group_fields= 0; /*safety*/
@@ -506,8 +1045,12 @@ public:
     ref_pointer_array_size= 0;
     zero_result_cause= 0;
     optimized= 0;
+    initialized= 0;
     cond_equal= 0;
+    having_equal= 0;
+    exec_const_cond= 0;
     group_optimized_away= 0;
+    no_rows_in_result_called= 0;
 
     all_fields= fields_arg;
     if (&fields_list != &fields_arg)      /* Avoid valgrind-warning */
@@ -518,18 +1061,25 @@ public:
     rollup.state= ROLLUP::STATE_NONE;
 
     no_const_tables= FALSE;
+    outer_ref_cond= 0;
+    in_to_exists_where= NULL;
+    in_to_exists_having= NULL;
   }
 
   int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
 	      COND *conds, uint og_num, ORDER *order, ORDER *group,
 	      Item *having, ORDER *proc_param, SELECT_LEX *select,
 	      SELECT_LEX_UNIT *unit);
+  bool prepare_stage2();
   int optimize();
   int reinit();
+  int init_execution();
   void exec();
   int destroy();
   void restore_tmp();
   bool alloc_func_list();
+  bool flatten_subqueries();
+  bool optimize_unflattened_subqueries();
   bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
 			  bool before_group_by, bool recompute= FALSE);
 
@@ -551,7 +1101,6 @@ public:
 			  Item_sum ***func);
   int rollup_send_data(uint idx);
   int rollup_write_data(uint idx, TABLE *table);
-  void remove_subq_pushed_predicates(Item **where);
   /**
     Release memory and, if possible, the open tables held by this execution
     plan (and nested plans). It's used to release some tables before
@@ -566,8 +1115,8 @@ public:
   bool init_save_join_tab();
   bool send_row_on_empty_set()
   {
-    return (do_send_rows && tmp_table_param.sum_func_count != 0 &&
-	    !group_list && having_value != Item::COND_FALSE);
+    return (do_send_rows && implicit_grouping && !group_optimized_away &&
+            having_value != Item::COND_FALSE);
   }
   bool change_result(select_result *result);
   bool is_top_level_join() const
@@ -576,6 +1125,47 @@ public:
                                         select_lex == unit->fake_select_lex));
   }
   void cache_const_exprs();
+  inline table_map all_tables_map()
+  {
+    return (table_map(1) << table_count) - 1;
+  }
+  void drop_unused_derived_keys();
+  inline void eval_select_list_used_tables();
+  /* 
+    Return the table for which an index scan can be used to satisfy 
+    the sort order needed by the ORDER BY/(implicit) GROUP BY clause 
+  */
+  JOIN_TAB *get_sort_by_join_tab()
+  {
+    return (need_tmp || !sort_by_table || skip_sort_order ||
+            ((group || tmp_table_param.sum_func_count) && !group_list)) ?
+              NULL : join_tab+const_tables;
+  }
+  bool setup_subquery_caches();
+  bool shrink_join_buffers(JOIN_TAB *jt, 
+                           ulonglong curr_space,
+                           ulonglong needed_space);
+  void set_allowed_join_cache_types();
+  bool is_allowed_hash_join_access()
+  { 
+    return test(allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
+           max_allowed_join_cache_level > JOIN_CACHE_HASHED_BIT;
+  }
+  bool choose_subquery_plan(table_map join_tables);
+  void get_partial_cost_and_fanout(uint end_tab_idx,
+                                   table_map filter_map,
+                                   double *read_time_arg, 
+                                   double *record_count_arg);
+  void get_prefix_cost_and_fanout(uint n_tables, 
+                                  double *read_time_arg,
+                                  double *record_count_arg);
+  /* defined in opt_subselect.cc */
+  bool transform_max_min_subquery();
+  /* True if this JOIN is a subquery under an IN predicate. */
+  bool is_in_subquery()
+  {
+    return (unit->item && unit->item->is_in_predicate());
+  }
 private:
   /**
     TRUE if the query contains an aggregate function but has no GROUP
@@ -586,6 +1176,15 @@ private:
   void cleanup_item_list(List<Item> &items) const;
 };
 
+enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS};
+enum enum_with_const_tables { WITH_CONST_TABLES, WITHOUT_CONST_TABLES};
+
+JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls);
+JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, 
+                          enum enum_with_bush_roots include_bush_roots);
+
+JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const);
+JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab);
 
 typedef struct st_select_check {
   uint const_ref,reg_ref;
@@ -595,11 +1194,6 @@ extern const char *join_type_str[];
 
 /* Extern functions in sql_select.cc */
 bool store_val_in_field(Field *field, Item *val, enum_check_fields check_flag);
-TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
-			ORDER *group, bool distinct, bool save_sum_fields,
-			ulonglong select_options, ha_rows rows_limit,
-			const char* alias);
-void free_tmp_table(THD *thd, TABLE *entry);
 void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, 
                        List<Item> &fields, bool reset_with_sum_func);
 bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
@@ -608,8 +1202,6 @@ bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
 		       uint elements, List<Item> &fields);
 void copy_fields(TMP_TABLE_PARAM *param);
 bool copy_funcs(Item **func_ptr, const THD *thd);
-bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
-			     int error, bool ignore_last_dupp_error);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 Field* create_tmp_field_from_field(THD *thd, Field* org_field,
                                    const char *name, TABLE *table,
@@ -620,7 +1212,7 @@ bool is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args);
 /* functions from opt_sum.cc */
 bool simple_pred(Item_func *func_item, Item **args, bool *inv_order);
 int opt_sum_query(THD* thd,
-                  TABLE_LIST *tables, List<Item> &all_fields, COND *conds);
+                  List<TABLE_LIST> &tables, List<Item> &all_fields, COND *conds);
 
 /* from sql_delete.cc, used by opt_range.cc */
 extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b);
@@ -632,6 +1224,7 @@ class store_key :public Sql_alloc
 public:
   bool null_key; /* TRUE <=> the value of the key has a null part */
   enum store_key_result { STORE_KEY_OK, STORE_KEY_FATAL, STORE_KEY_CONV };
+  enum Type { FIELD_STORE_KEY, ITEM_STORE_KEY, CONST_ITEM_STORE_KEY };
   store_key(THD *thd, Field *field_arg, uchar *ptr, uchar *null, uint length)
     :null_key(0), null_ptr(null), err(0)
   {
@@ -652,6 +1245,7 @@ public:
                                         ptr, null, 1);
   }
   virtual ~store_key() {}			/** Not actually needed */
+  virtual enum Type type() const=0;
   virtual const char *name() const=0;
 
   /**
@@ -703,15 +1297,32 @@ class store_key_field: public store_key
     {
       copy_field.set(to_field,from_field,0);
     }
-  }
+  }  
+
+  enum Type type() const { return FIELD_STORE_KEY; }
   const char *name() const { return field_name; }
 
+  void change_source_field(Item_field *fld_item)
+  {
+    copy_field.set(to_field, fld_item->field, 0);
+    field_name= fld_item->full_name();
+  }
+
  protected: 
   enum store_key_result copy_inner()
   {
     TABLE *table= copy_field.to_field->table;
     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
                                                      table->write_set);
+
+    /* 
+      It looks like the next statement is needed only for a simplified
+      hash function over key values used now in BNLH join.
+      When the implementation of this function will be replaced for a proper
+      full version this statement probably should be removed.
+    */  
+    bzero(copy_field.to_ptr,copy_field.to_length);
+
     copy_field.do_copy(&copy_field);
     dbug_tmp_restore_column_map(table->write_set, old_map);
     null_key= to_field->is_null();
@@ -724,13 +1335,20 @@ class store_key_item :public store_key
 {
  protected:
   Item *item;
+  /*
+    Flag that forces usage of save_val() method which save value of the
+    item instead of save_in_field() method which saves result.
+  */
+  bool use_value;
 public:
   store_key_item(THD *thd, Field *to_field_arg, uchar *ptr,
-                 uchar *null_ptr_arg, uint length, Item *item_arg)
+                 uchar *null_ptr_arg, uint length, Item *item_arg, bool val)
     :store_key(thd, to_field_arg, ptr,
 	       null_ptr_arg ? null_ptr_arg : item_arg->maybe_null ?
-	       &err : (uchar*) 0, length), item(item_arg)
+	       &err : (uchar*) 0, length), item(item_arg), use_value(val)
   {}
+
+  enum Type type() const { return ITEM_STORE_KEY; }
   const char *name() const { return "func"; }
 
  protected:  
@@ -739,7 +1357,20 @@ public:
     TABLE *table= to_field->table;
     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
                                                      table->write_set);
-    int res= item->save_in_field(to_field, 1);
+    int res= FALSE;
+
+    /* 
+      It looks like the next statement is needed only for a simplified
+      hash function over key values used now in BNLH join.
+      When the implementation of this function will be replaced for a proper
+      full version this statement probably should be removed.
+    */  
+    to_field->reset();
+
+    if (use_value)
+      item->save_val(to_field);
+    else
+      res= item->save_in_field(to_field, 1);
     /*
      Item::save_in_field() may call Item::val_xxx(). And if this is a subquery
      we need to check for errors executing it and react accordingly
@@ -763,9 +1394,11 @@ public:
 		       Item *item_arg)
     :store_key_item(thd, to_field_arg,ptr,
 		    null_ptr_arg ? null_ptr_arg : item_arg->maybe_null ?
-		    &err : (uchar*) 0, length, item_arg), inited(0)
+		    &err : (uchar*) 0, length, item_arg, FALSE), inited(0)
   {
   }
+
+  enum Type type() const { return CONST_ITEM_STORE_KEY; }
   const char *name() const { return "const"; }
 
 protected:  
@@ -775,6 +1408,9 @@ protected:
     if (!inited)
     {
       inited=1;
+      TABLE *table= to_field->table;
+      my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
+                                                       table->write_set);
       if ((res= item->save_in_field(to_field, 1)))
       {       
         if (!err)
@@ -786,6 +1422,7 @@ protected:
         */
       if (!err && to_field->table->in_use->is_error())
         err= 1; /* STORE_KEY_FATAL */
+      dbug_tmp_restore_column_map(table->write_set, old_map);
     }
     null_key= to_field->is_null() || item->null_value;
     return (err > 2 ? STORE_KEY_FATAL : (store_key_result) err);
@@ -806,7 +1443,9 @@ int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
 		List<Item> &fields, List<Item> &all_fields, ORDER *order,
 		bool *hidden_group_fields);
 bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
-                   Item **ref_pointer_array, ORDER *group_list= NULL);
+                   Item **ref_pointer_array);
+int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table,
+                   struct st_table_ref *table_ref);
 
 bool handle_select(THD *thd, LEX *lex, select_result *result,
                    ulong setup_tables_done_option);
@@ -826,6 +1465,10 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
 			bool table_cant_handle_bit_fields,
                         bool make_copy_field,
                         uint convert_blob_length);
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options, my_bool big_tables);
 
 /*
   General routine to change field->ptr of a NULL-terminated array of Field
@@ -837,6 +1480,23 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
 
 TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list);
 
+int test_if_item_cache_changed(List<Cached_item> &list);
+int join_init_read_record(JOIN_TAB *tab);
+int join_read_record_no_init(JOIN_TAB *tab);
+void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
+inline Item * and_items(Item* cond, Item *item)
+{
+  return (cond? (new Item_cond_and(cond, item)) : item);
+}
+bool choose_plan(JOIN *join, table_map join_tables);
+void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, 
+                                table_map last_remaining_tables, 
+                                bool first_alt, uint no_jbuf_before,
+                                double *outer_rec_count, double *reopt_cost);
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
+bool test_if_ref(COND *root_cond, 
+                 Item_field *left_item,Item *right_item);
 
 inline bool optimizer_flag(THD *thd, uint flag)
 { 
@@ -850,5 +1510,37 @@ bool const_expression_in_where(COND *cond, Item *comp_item,
                                Field *comp_field= NULL,
                                Item **const_item= NULL);
 
+/* Table elimination entry point function */
+void eliminate_tables(JOIN *join);
+
+/* Index Condition Pushdown entry point function */
+void push_index_cond(JOIN_TAB *tab, uint keyno);
+
+/****************************************************************************
+  Temporary table support for SQL Runtime
+ ***************************************************************************/
+
+#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
+#define AVG_STRING_LENGTH_TO_PACK_ROWS   64
+#define RATIO_TO_PACK_ROWS	       2
+#define MIN_STRING_LENGTH_TO_PACK_ROWS   10
+
+TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
+			ORDER *group, bool distinct, bool save_sum_fields,
+			ulonglong select_options, ha_rows rows_limit,
+			const char* alias, bool do_not_open=FALSE);
+void free_tmp_table(THD *thd, TABLE *entry);
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error, bool ignore_last_dupp_key_error);
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options, my_bool big_tables);
+bool open_tmp_table(TABLE *table);
+void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
+double prev_record_reads(POSITION *positions, uint idx, table_map found_ref);
+void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist);
 
 #endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_servers.cc b/sql/sql_servers.cc
index 3796fa87f08..dce679a883f 100644
--- a/sql/sql_servers.cc
+++ b/sql/sql_servers.cc
@@ -211,8 +211,9 @@ static bool servers_load(THD *thd, TABLE_LIST *tables)
   free_root(&mem, MYF(0));
   init_sql_alloc(&mem, ACL_ALLOC_BLOCK_SIZE, 0);
 
-  init_read_record(&read_record_info,thd,table=tables[0].table,NULL,1,0, 
-                   FALSE);
+  if (init_read_record(&read_record_info,thd,table=tables[0].table,NULL,1,0, 
+                       FALSE))
+    DBUG_RETURN(1);
   while (!(read_record_info.read_record(&read_record_info)))
   {
     /* return_val is already TRUE, so no need to set */
@@ -544,10 +545,10 @@ int insert_server_record(TABLE *table, FOREIGN_SERVER *server)
                          system_charset_info);
 
   /* read index until record is that specified in server_name */
-  if ((error= table->file->index_read_idx_map(table->record[0], 0,
-                                              (uchar *)table->field[0]->ptr,
-                                              HA_WHOLE_KEY,
-                                              HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                                 (uchar *)table->field[0]->ptr,
+                                                 HA_WHOLE_KEY,
+                                                 HA_READ_KEY_EXACT)))
   {
     /* if not found, err */
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
@@ -888,10 +889,10 @@ update_server_record(TABLE *table, FOREIGN_SERVER *server)
                          server->server_name_length,
                          system_charset_info);
 
-  if ((error= table->file->index_read_idx_map(table->record[0], 0,
-                                              (uchar *)table->field[0]->ptr,
-                                              ~(longlong)0,
-                                              HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                                 (uchar *)table->field[0]->ptr,
+                                                 ~(longlong)0,
+                                                 HA_READ_KEY_EXACT)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       table->file->print_error(error, MYF(0));
@@ -947,10 +948,10 @@ delete_server_record(TABLE *table,
   /* set the field that's the PK to the value we're looking for */
   table->field[0]->store(server_name, server_name_length, system_charset_info);
 
-  if ((error= table->file->index_read_idx_map(table->record[0], 0,
-                                          (uchar *)table->field[0]->ptr,
-                                          HA_WHOLE_KEY,
-                                          HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                                 (uchar *)table->field[0]->ptr,
+                                                 HA_WHOLE_KEY,
+                                                 HA_READ_KEY_EXACT)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       table->file->print_error(error, MYF(0));
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 887115b38ad..a65a2e40577 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -22,6 +23,7 @@
 #include "sql_acl.h"                        // fill_schema_*_privileges
 #include "sql_select.h"                         // For select_describe
 #include "sql_base.h"                       // close_tables_for_reopen
+#include "create_options.h"
 #include "sql_show.h"
 #include "sql_table.h"                        // filename_to_tablename,
                                               // primary_key_name,
@@ -42,6 +44,8 @@
 #include "sp_pcontext.h"
 #include "set_var.h"
 #include "sql_trigger.h"
+#include "sql_derived.h"
+#include "sql_connect.h"
 #include "authors.h"
 #include "contributors.h"
 #include "sql_partition.h"
@@ -53,6 +57,7 @@
 #include "lock.h"                           // MYSQL_OPEN_IGNORE_FLUSH
 #include "debug_sync.h"
 #include "datadict.h"   // dd_frm_type()
+#include "keycaches.h"
 
 #define STR_OR_NIL(S) ((S) ? (S) : "<nil>")
 
@@ -97,6 +102,9 @@ static TYPELIB grant_types = { sizeof(grant_names)/sizeof(char **),
                                grant_names, NULL};
 #endif
 
+/* Match the values of enum ha_choice */
+static const char *ha_choice_values[] = {"", "0", "1"};
+
 static void store_key_options(THD *thd, String *packet, TABLE *table,
                               KEY *key_info);
 
@@ -122,11 +130,21 @@ static int make_version_string(char *buf, int buf_length, uint version)
   return my_snprintf(buf, buf_length, "%d.%d", version>>8,version&0xff);
 }
 
+
+static const LEX_STRING maturity_name[]={
+  { C_STRING_WITH_LEN("Unknown") },
+  { C_STRING_WITH_LEN("Experimental") },
+  { C_STRING_WITH_LEN("Alpha") },
+  { C_STRING_WITH_LEN("Beta") },
+  { C_STRING_WITH_LEN("Gamma") },
+  { C_STRING_WITH_LEN("Stable") }};
+
+
 static my_bool show_plugins(THD *thd, plugin_ref plugin,
                             void *arg)
 {
   TABLE *table= (TABLE*) arg;
-  struct st_mysql_plugin *plug= plugin_decl(plugin);
+  struct st_maria_plugin *plug= plugin_decl(plugin);
   struct st_plugin_dl *plugin_dl= plugin_dlib(plugin);
   CHARSET_INFO *cs= system_charset_info;
   char version_buf[20];
@@ -140,7 +158,6 @@ static my_bool show_plugins(THD *thd, plugin_ref plugin,
         make_version_string(version_buf, sizeof(version_buf), plug->version),
         cs);
 
-
   switch (plugin_state(plugin)) {
   /* case PLUGIN_IS_FREED: does not happen */
   case PLUGIN_IS_DELETED:
@@ -172,7 +189,7 @@ static my_bool show_plugins(THD *thd, plugin_ref plugin,
     table->field[5]->set_notnull();
     table->field[6]->store(version_buf,
           make_version_string(version_buf, sizeof(version_buf),
-                              plugin_dl->version),
+                              plugin_dl->mariaversion),
           cs);
     table->field[6]->set_notnull();
   }
@@ -201,25 +218,40 @@ static my_bool show_plugins(THD *thd, plugin_ref plugin,
 
   switch (plug->license) {
   case PLUGIN_LICENSE_GPL:
-    table->field[9]->store(PLUGIN_LICENSE_GPL_STRING, 
+    table->field[9]->store(PLUGIN_LICENSE_GPL_STRING,
                            strlen(PLUGIN_LICENSE_GPL_STRING), cs);
     break;
   case PLUGIN_LICENSE_BSD:
-    table->field[9]->store(PLUGIN_LICENSE_BSD_STRING, 
+    table->field[9]->store(PLUGIN_LICENSE_BSD_STRING,
                            strlen(PLUGIN_LICENSE_BSD_STRING), cs);
     break;
   default:
-    table->field[9]->store(PLUGIN_LICENSE_PROPRIETARY_STRING, 
+    table->field[9]->store(PLUGIN_LICENSE_PROPRIETARY_STRING,
                            strlen(PLUGIN_LICENSE_PROPRIETARY_STRING), cs);
     break;
   }
-  table->field[9]->set_notnull();
 
   table->field[10]->store(
     global_plugin_typelib_names[plugin_load_option(plugin)],
     strlen(global_plugin_typelib_names[plugin_load_option(plugin)]),
     cs);
 
+  if (plug->maturity <= MariaDB_PLUGIN_MATURITY_STABLE)
+     table->field[11]->store(maturity_name[plug->maturity].str,
+                             maturity_name[plug->maturity].length,
+                             cs);
+   else
+     table->field[11]->store("Unknown", 7, cs);
+
+  if (plug->version_info)
+  {
+    table->field[12]->store(plug->version_info,
+                            strlen(plug->version_info), cs);
+    table->field[12]->set_notnull();
+  }
+  else
+    table->field[12]->set_null();
+
   return schema_table_store_record(thd, table);
 }
 
@@ -427,8 +459,6 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
       wild_length= strlen(wild);
   }
 
-
-
   bzero((char*) &table_list,sizeof(table_list));
 
   if (!(dirp = my_dir(path,MYF(dir ? MY_WANT_STAT : 0))))
@@ -442,11 +472,11 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
 
   for (i=0 ; i < (uint) dirp->number_off_files  ; i++)
   {
-    char uname[NAME_LEN + 1];                   /* Unencoded name */
+    char uname[SAFE_NAME_LEN + 1];              /* Unencoded name */
     file=dirp->dir_entry+i;
     if (dir)
     {                                           /* Return databases */
-      if ((file->name[0] == '.' && 
+      if ((file->name[0] == '.' &&
           ((file->name[1] == '.' && file->name[2] == '\0') ||
             file->name[1] == '\0')))
         continue;                               /* . or .. */
@@ -477,7 +507,7 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
           if (my_wildcmp(files_charset_info,
                          uname, uname + file_name_len,
                          wild, wild + wild_length,
-                         wild_prefix, wild_one,wild_many))
+                         wild_prefix, wild_one, wild_many))
             continue;
 	}
 	else if (wild_compare(uname, wild, 0))
@@ -519,7 +549,7 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
         continue;
     }
 #endif
-    if (!(file_name= 
+    if (!(file_name=
           thd->make_lex_string(file_name, uname, file_name_len, TRUE)) ||
         files->push_back(file_name))
     {
@@ -691,7 +721,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
     bool open_error=
       open_tables(thd, &table_list, &counter,
                   MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL) ||
-                  mysql_handle_derived(thd->lex, &mysql_derived_prepare);
+                  mysql_handle_derived(thd->lex, DT_PREPARE);
     thd->pop_internal_handler();
     if (open_error && (thd->killed || thd->is_error()))
       goto exit;
@@ -747,7 +777,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
       protocol->store(table_list->schema_table->table_name,
                       system_charset_info);
     else
-      protocol->store(table_list->table->alias, system_charset_info);
+      protocol->store(table_list->table->alias.c_ptr(), system_charset_info);
   }
 
   if (table_list->view)
@@ -799,6 +829,7 @@ bool mysqld_show_create_db(THD *thd, char *dbname,
 		sctx->master_access);
   if (!(db_access & DB_ACLS) && check_grant_db(thd,dbname))
   {
+    status_var_increment(thd->status_var.access_denied_errors);
     my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
              sctx->priv_user, sctx->host_or_ip, dbname);
     general_log_print(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR),
@@ -872,7 +903,8 @@ mysqld_list_fields(THD *thd, TABLE_LIST *table_list, const char *wild)
   DBUG_PRINT("enter",("table: %s",table_list->table_name));
 
   if (open_normal_and_derived_tables(thd, table_list,
-                                     MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL))
+                                     MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL,
+                                     DT_PREPARE | DT_CREATE))
     DBUG_VOID_RETURN;
   table= table_list->table;
 
@@ -881,7 +913,7 @@ mysqld_list_fields(THD *thd, TABLE_LIST *table_list, const char *wild)
   Field **ptr,*field;
   for (ptr=table->field ; (field= *ptr); ptr++)
   {
-    if (!wild || !wild[0] || 
+    if (!wild || !wild[0] ||
         !wild_case_compare(system_charset_info, field->field_name,wild))
     {
       if (table_list->view)
@@ -962,7 +994,7 @@ append_identifier(THD *thd, String *packet, const char *name, uint length)
 
   /*
     The identifier must be quoted as it includes a quote character or
-   it's a keyword
+    it's a keyword
   */
 
   (void) packet->reserve(length*2 + 2);
@@ -1092,7 +1124,7 @@ static bool get_field_default_value(THD *thd, Field *timestamp_field,
         char *ptr= longlong2str(dec, tmp + 2, 2);
         uint32 length= (uint32) (ptr - tmp);
         tmp[0]= 'b';
-        tmp[1]= '\'';        
+        tmp[1]= '\'';
         tmp[length]= '\'';
         type.length(length + 1);
         quoted= 0;
@@ -1124,6 +1156,30 @@ static bool get_field_default_value(THD *thd, Field *timestamp_field,
 }
 
 
+/**
+  Appends list of options to string
+
+  @param thd             thread handler
+  @param packet          string to append
+  @param opt             list of options
+*/
+
+static void append_create_options(THD *thd, String *packet,
+				  engine_option_value *opt)
+{
+  for(; opt; opt= opt->next)
+  {
+    DBUG_ASSERT(opt->value.str);
+    packet->append(' ');
+    append_identifier(thd, packet, opt->name.str, opt->name.length);
+    packet->append('=');
+    if (opt->quoted_value)
+      append_unescaped(packet, opt->value.str, opt->value.length);
+    else
+      packet->append(opt->value.str, opt->value.length);
+  }
+}
+
 /*
   Build a CREATE TABLE statement for a table.
 
@@ -1138,11 +1194,11 @@ static bool get_field_default_value(THD *thd, Field *timestamp_field,
                       to tailor the format of the statement.  Can be
                       NULL, in which case only SQL_MODE is considered
                       when building the statement.
-  
+
   NOTE
     Currently always return 0, but might return error code in the
     future.
-    
+
   RETURN
     0       OK
  */
@@ -1190,7 +1246,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
   else
   {
     if (lower_case_table_names == 2)
-      alias= table->alias;
+      alias= table->alias.c_ptr();
     else
     {
       alias= share->table_name.str;
@@ -1243,7 +1299,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
     field->sql_type(type);
     packet->append(type.ptr(), type.length(), system_charset_info);
 
-    if (field->has_charset() && 
+    if (field->has_charset() &&
         !(thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)))
     {
       if (field->charset() != share->table_charset)
@@ -1251,8 +1307,8 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
 	packet->append(STRING_WITH_LEN(" CHARACTER SET "));
 	packet->append(field->charset()->csname);
       }
-      /* 
-	For string types dump collation name only if 
+      /*
+	For string types dump collation name only if
 	collation is not primary for the given charset
       */
       if (!(field->charset()->state & MY_CS_PRIMARY))
@@ -1262,6 +1318,19 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       }
     }
 
+    if (field->vcol_info)
+    {
+      packet->append(STRING_WITH_LEN(" AS ("));
+      packet->append(field->vcol_info->expr_str.str,
+                     field->vcol_info->expr_str.length,
+                     system_charset_info);
+      packet->append(STRING_WITH_LEN(")"));
+      if (field->stored_in_db)
+        packet->append(STRING_WITH_LEN(" PERSISTENT"));
+      else
+        packet->append(STRING_WITH_LEN(" VIRTUAL"));
+    }
+
     if (flags & NOT_NULL_FLAG)
       packet->append(STRING_WITH_LEN(" NOT NULL"));
     else if (field->type() == MYSQL_TYPE_TIMESTAMP)
@@ -1273,18 +1342,19 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       packet->append(STRING_WITH_LEN(" NULL"));
     }
 
-    if (get_field_default_value(thd, table->timestamp_field,
+    if (!field->vcol_info &&
+        get_field_default_value(thd, table->timestamp_field,
                                 field, &def_value, 1))
     {
       packet->append(STRING_WITH_LEN(" DEFAULT "));
       packet->append(def_value.ptr(), def_value.length(), system_charset_info);
     }
 
-    if (!limited_mysql_mode && table->timestamp_field == field && 
+    if (!limited_mysql_mode && table->timestamp_field == field &&
         field->unireg_check != Field::TIMESTAMP_DN_FIELD)
       packet->append(STRING_WITH_LEN(" ON UPDATE CURRENT_TIMESTAMP"));
 
-    if (field->unireg_check == Field::NEXT_NUMBER && 
+    if (field->unireg_check == Field::NEXT_NUMBER &&
         !(thd->variables.sql_mode & MODE_NO_FIELD_OPTIONS))
       packet->append(STRING_WITH_LEN(" AUTO_INCREMENT"));
 
@@ -1293,12 +1363,15 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       packet->append(STRING_WITH_LEN(" COMMENT "));
       append_unescaped(packet, field->comment.str, field->comment.length);
     }
+    append_create_options(thd, packet, field->option_list);
   }
 
   key_info= table->key_info;
   bzero((char*) &create_info, sizeof(create_info));
-  /* Allow update_create_info to update row type */
+  /* Allow update_create_info to update row type, page checksums and options */
   create_info.row_type= share->row_type;
+  create_info.page_checksum= share->page_checksum;
+  create_info.options= share->db_create_options;
   file->update_create_info(&create_info);
   primary_key= share->primary_key;
 
@@ -1362,6 +1435,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       append_identifier(thd, packet, parser_name->str, parser_name->length);
       packet->append(STRING_WITH_LEN(" */ "));
     }
+    append_create_options(thd, packet, key_info->option_list);
   }
 
   /*
@@ -1380,25 +1454,6 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
   {
     show_table_options= TRUE;
 
-    /* TABLESPACE and STORAGE */
-    if (share->tablespace ||
-        share->default_storage_media != HA_SM_DEFAULT)
-    {
-      packet->append(STRING_WITH_LEN(" /*!50100"));
-      if (share->tablespace)
-      {
-        packet->append(STRING_WITH_LEN(" TABLESPACE "));
-        packet->append(share->tablespace, strlen(share->tablespace));
-      }
-
-      if (share->default_storage_media == HA_SM_DISK)
-        packet->append(STRING_WITH_LEN(" STORAGE DISK"));
-      if (share->default_storage_media == HA_SM_MEMORY)
-        packet->append(STRING_WITH_LEN(" STORAGE MEMORY"));
-
-      packet->append(STRING_WITH_LEN(" */"));
-    }
-
     /*
       IF   check_create_info
       THEN add ENGINE only if it was used when creating the table
@@ -1485,20 +1540,30 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       packet->append(buff, (uint) (end - buff));
     }
 
-    if (share->db_create_options & HA_OPTION_PACK_KEYS)
+    if (create_info.options & HA_OPTION_PACK_KEYS)
       packet->append(STRING_WITH_LEN(" PACK_KEYS=1"));
-    if (share->db_create_options & HA_OPTION_NO_PACK_KEYS)
+    if (create_info.options & HA_OPTION_NO_PACK_KEYS)
       packet->append(STRING_WITH_LEN(" PACK_KEYS=0"));
     /* We use CHECKSUM, instead of TABLE_CHECKSUM, for backward compability */
-    if (share->db_create_options & HA_OPTION_CHECKSUM)
+    if (create_info.options & HA_OPTION_CHECKSUM)
       packet->append(STRING_WITH_LEN(" CHECKSUM=1"));
-    if (share->db_create_options & HA_OPTION_DELAY_KEY_WRITE)
+    if (create_info.page_checksum != HA_CHOICE_UNDEF)
+    {
+      packet->append(STRING_WITH_LEN(" PAGE_CHECKSUM="));
+      packet->append(ha_choice_values[create_info.page_checksum], 1);
+    }
+    if (create_info.options & HA_OPTION_DELAY_KEY_WRITE)
       packet->append(STRING_WITH_LEN(" DELAY_KEY_WRITE=1"));
     if (create_info.row_type != ROW_TYPE_DEFAULT)
     {
       packet->append(STRING_WITH_LEN(" ROW_FORMAT="));
       packet->append(ha_row_type[(uint) create_info.row_type]);
     }
+    if (share->transactional != HA_CHOICE_UNDEF)
+    {
+      packet->append(STRING_WITH_LEN(" TRANSACTIONAL="));
+      packet->append(ha_choice_values[(uint) share->transactional], 1);
+    }
     if (table->s->key_block_size)
     {
       char *end;
@@ -1517,6 +1582,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       packet->append(STRING_WITH_LEN(" CONNECTION="));
       append_unescaped(packet, share->connect_string.str, share->connect_string.length);
     }
+    append_create_options(thd, packet, share->option_list);
     append_directory(thd, packet, "DATA",  create_info.data_file_name);
     append_directory(thd, packet, "INDEX", create_info.index_file_name);
   }
@@ -1609,7 +1675,7 @@ view_store_options(THD *thd, TABLE_LIST *table, String *buff)
 
 /*
   Append DEFINER clause to the given buffer.
-  
+
   SYNOPSIS
     append_definer()
     thd           [in] thread handle
@@ -1621,7 +1687,7 @@ view_store_options(THD *thd, TABLE_LIST *table, String *buff)
 static void append_algorithm(TABLE_LIST *table, String *buff)
 {
   buff->append(STRING_WITH_LEN("ALGORITHM="));
-  switch ((int8)table->algorithm) {
+  switch ((int16)table->algorithm) {
   case VIEW_ALGORITHM_UNDEFINED:
     buff->append(STRING_WITH_LEN("UNDEFINED "));
     break;
@@ -1638,7 +1704,7 @@ static void append_algorithm(TABLE_LIST *table, String *buff)
 
 /*
   Append DEFINER clause to the given buffer.
-  
+
   SYNOPSIS
     append_definer()
     thd           [in] thread handle
@@ -1745,6 +1811,7 @@ public:
   uint   command;
   const char *user,*host,*db,*proc_info,*state_info;
   CSET_STRING query_string;
+  double progress;
 };
 
 #ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
@@ -1797,6 +1864,11 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
   field->maybe_null=1;
   field_list.push_back(field=new Item_empty_string("Info",max_query_length));
   field->maybe_null=1;
+  if (!thd->variables.old_mode)
+  {
+    field_list.push_back(field= new Item_float("Progress", 0.0, 3, 7));
+    field->maybe_null= 0;
+  }
   if (protocol->send_result_set_metadata(&field_list,
                             Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
     DBUG_VOID_RETURN;
@@ -1827,8 +1899,8 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
 			"%s:%u", tmp_sctx->host_or_ip, tmp->peer_port);
 	}
 	else
-	  thd_info->host= thd->strdup(tmp_sctx->host_or_ip[0] ? 
-                                      tmp_sctx->host_or_ip : 
+	  thd_info->host= thd->strdup(tmp_sctx->host_or_ip[0] ?
+                                      tmp_sctx->host_or_ip :
                                       tmp_sctx->host ? tmp_sctx->host : "");
         if ((thd_info->db=tmp->db))             // Safe test
           thd_info->db=thd->strdup(thd_info->db);
@@ -1850,8 +1922,24 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
           thd_info->query_string=
             CSET_STRING(q, q ? length : 0, tmp->query_charset());
         }
-        mysql_mutex_unlock(&tmp->LOCK_thd_data);
+
+        /*
+          Progress report. We need to do this under a lock to ensure that all
+          is from the same stage.
+        */
+        if (tmp->progress.max_counter)
+        {
+          uint max_stage= max(tmp->progress.max_stage, 1);
+          thd_info->progress= (((tmp->progress.stage / (double) max_stage) +
+                                ((tmp->progress.counter /
+                                  (double) tmp->progress.max_counter) /
+                                 (double) max_stage)) *
+                               100.0);
+        }
+        else
+          thd_info->progress= 0.0;
         thd_info->start_time= tmp->start_time;
+        mysql_mutex_unlock(&tmp->LOCK_thd_data);
         thread_infos.append(thd_info);
       }
     }
@@ -1860,6 +1948,9 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
 
   thread_info *thd_info;
   time_t now= my_time(0);
+  char buff[20];                                // For progress
+  String store_buffer(buff, sizeof(buff), system_charset_info);
+
   while ((thd_info=thread_infos.get()))
   {
     protocol->prepare_for_resend();
@@ -1878,6 +1969,8 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
     protocol->store(thd_info->state_info, system_charset_info);
     protocol->store(thd_info->query_string.str(),
                     thd_info->query_string.charset());
+    if (!thd->variables.old_mode)
+      protocol->store(thd_info->progress, 3, &store_buffer);
     if (protocol->write())
       break; /* purecov: inspected */
   }
@@ -1890,8 +1983,8 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
   TABLE *table= tables->table;
   CHARSET_INFO *cs= system_charset_info;
   char *user;
-  time_t now= my_time(0);
-  DBUG_ENTER("fill_process_list");
+  my_hrtime_t unow= my_hrtime();
+  DBUG_ENTER("fill_schema_processlist");
 
   user= thd->security_ctx->master_access & PROCESS_ACL ?
         NullS : thd->security_ctx->priv_user;
@@ -1908,6 +2001,7 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
       Security_context *tmp_sctx= tmp->security_ctx;
       struct st_my_thread_var *mysys_var;
       const char *val;
+      ulonglong max_counter;
 
       if ((!tmp->vio_ok() && !tmp->system_thread) ||
           (user && (!tmp_sctx->user || strcmp(tmp_sctx->user, user))))
@@ -1949,8 +2043,10 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
         table->field[4]->store(command_name[tmp->command].str,
                                command_name[tmp->command].length, cs);
       /* MYSQL_TIME */
-      table->field[5]->store((longlong)(tmp->start_time ?
-                                      now - tmp->start_time : 0), FALSE);
+      const ulonglong utime= (tmp->start_time ?
+                              (unow.val - tmp->start_time * HRTIME_RESOLUTION -
+                               tmp->start_time_sec_part) : 0);
+      table->field[5]->store(utime / HRTIME_RESOLUTION, TRUE);
       /* STATE */
       if ((val= thread_state_info(tmp)))
       {
@@ -1962,6 +2058,9 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
         mysql_mutex_unlock(&mysys_var->mutex);
       mysql_mutex_unlock(&tmp->LOCK_thd_data);
 
+      /* TIME_MS */
+      table->field[8]->store((double)(utime / (HRTIME_RESOLUTION / 1000.0)));
+
       /* INFO */
       /* Lock THD mutex that protects its data when looking at it. */
       mysql_mutex_lock(&tmp->LOCK_thd_data);
@@ -1972,6 +2071,18 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
                                    tmp->query_length()), cs);
         table->field[7]->set_notnull();
       }
+
+      /*
+        Progress report. We need to do this under a lock to ensure that all
+        is from the same stage.
+      */
+      if ((max_counter= tmp->progress.max_counter))
+      {
+        table->field[9]->store((longlong) tmp->progress.stage + 1, 1);
+        table->field[10]->store((longlong) tmp->progress.max_stage, 1);
+        table->field[11]->store((double) tmp->progress.counter /
+                                (double) max_counter*100.0);
+      }
       mysql_mutex_unlock(&tmp->LOCK_thd_data);
 
       if (schema_table_store_record(thd, table))
@@ -2086,7 +2197,7 @@ void reset_status_vars()
     /* Note that SHOW_LONG_NOFLUSH variables are not reset */
     if (ptr->type == SHOW_LONG)
       *(ulong*) ptr->value= 0;
-  }  
+  }
 }
 
 /*
@@ -2162,11 +2273,7 @@ void remove_status_vars(SHOW_VAR *list)
   }
 }
 
-inline void make_upper(char *buf)
-{
-  for (; *buf; buf++)
-    *buf= my_toupper(system_charset_info, *buf);
-}
+
 
 static bool show_status_array(THD *thd, const char *wild,
                               SHOW_VAR *variables,
@@ -2190,7 +2297,7 @@ static bool show_status_array(THD *thd, const char *wild,
   CHARSET_INFO *charset= system_charset_info;
   DBUG_ENTER("show_status_array");
 
-  thd->count_cuted_fields= CHECK_FIELD_WARN;  
+  thd->count_cuted_fields= CHECK_FIELD_WARN;
   null_lex_str.str= 0;				// For sys_var->value_ptr()
   null_lex_str.length= 0;
 
@@ -2205,7 +2312,7 @@ static bool show_status_array(THD *thd, const char *wild,
     strnmov(prefix_end, variables->name, len);
     name_buffer[sizeof(name_buffer)-1]=0;       /* Safety */
     if (ucase_names)
-      make_upper(name_buffer);
+      my_caseup_str(system_charset_info, name_buffer);
 
     restore_record(table, s->default_values);
     table->field[0]->store(name_buffer, strlen(name_buffer),
@@ -2249,21 +2356,21 @@ static bool show_status_array(THD *thd, const char *wild,
         */
         switch (show_type) {
         case SHOW_DOUBLE_STATUS:
-          value= ((char *) status_var + (ulong) value);
+          value= ((char *) status_var + (intptr) value);
           /* fall through */
         case SHOW_DOUBLE:
           /* 6 is the default precision for '%f' in sprintf() */
           end= buff + my_fcvt(*(double *) value, 6, buff, NULL);
           break;
         case SHOW_LONG_STATUS:
-          value= ((char *) status_var + (ulong) value);
+          value= ((char *) status_var + (intptr) value);
           /* fall through */
         case SHOW_LONG:
         case SHOW_LONG_NOFLUSH: // the difference lies in refresh_status()
           end= int10_to_str(*(long*) value, buff, 10);
           break;
         case SHOW_LONGLONG_STATUS:
-          value= ((char *) status_var + (ulong) value);
+          value= ((char *) status_var + (intptr) value);
           /* fall through */
         case SHOW_LONGLONG:
           end= longlong10_to_str(*(longlong*) value, buff, 10);
@@ -2310,14 +2417,6 @@ static bool show_status_array(THD *thd, const char *wild,
             end= pos + ls->length;
           break;
         }
-        case SHOW_KEY_CACHE_LONG:
-          value= (char*) dflt_key_cache + (ulong)value;
-          end= int10_to_str(*(long*) value, buff, 10);
-          break;
-        case SHOW_KEY_CACHE_LONGLONG:
-          value= (char*) dflt_key_cache + (ulong)value;
-	  end= longlong10_to_str(*(longlong*) value, buff, 10);
-	  break;
         case SHOW_UNDEF:
           break;                                        // Return empty string
         case SHOW_SYS:                                  // Cannot happen
@@ -2344,6 +2443,321 @@ end:
   DBUG_RETURN(res);
 }
 
+#ifdef COMPLETE_PATCH_NOT_ADDED_YET
+/*
+  Aggregate values for mapped_user entries by their role.
+
+  SYNOPSIS
+  aggregate_user_stats
+  all_user_stats - input to aggregate
+  agg_user_stats - returns aggregated values
+
+  RETURN
+  0 - OK
+  1 - error
+*/
+
+static int aggregate_user_stats(HASH *all_user_stats, HASH *agg_user_stats)
+{
+  DBUG_ENTER("aggregate_user_stats");
+  if (my_hash_init(agg_user_stats, system_charset_info,
+                max(all_user_stats->records, 1),
+                0, 0, (my_hash_get_key)get_key_user_stats,
+                (my_hash_free_key)free_user_stats, 0))
+  {
+    sql_print_error("Malloc in aggregate_user_stats failed");
+    DBUG_RETURN(1);
+  }
+
+  for (uint i= 0; i < all_user_stats->records; i++)
+  {
+    USER_STATS *user= (USER_STATS*)my_hash_element(all_user_stats, i);
+    USER_STATS *agg_user;
+    uint name_length= strlen(user->priv_user);
+
+    if (!(agg_user= (USER_STATS*) my_hash_search(agg_user_stats,
+                                              (uchar*)user->priv_user,
+                                              name_length)))
+    {
+      // First entry for this role.
+      if (!(agg_user= (USER_STATS*) my_malloc(sizeof(USER_STATS),
+                                              MYF(MY_WME | MY_ZEROFILL))))
+      {
+        sql_print_error("Malloc in aggregate_user_stats failed");
+        DBUG_RETURN(1);
+      }
+
+      init_user_stats(agg_user, user->priv_user, name_length,
+                      user->priv_user,
+                      user->total_connections, user->concurrent_connections,
+                      user->connected_time, user->busy_time, user->cpu_time,
+                      user->bytes_received, user->bytes_sent,
+                      user->binlog_bytes_written,
+                      user->rows_sent, user->rows_read,
+                      user->rows_inserted, user->rows_deleted,
+                      user->rows_updated, 
+                      user->select_commands, user->update_commands,
+                      user->other_commands,
+                      user->commit_trans, user->rollback_trans,
+                      user->denied_connections, user->lost_connections,
+                      user->access_denied_errors, user->empty_queries);
+
+      if (my_hash_insert(agg_user_stats, (uchar*) agg_user))
+      {
+        /* Out of memory */
+        my_free(agg_user, 0);
+        sql_print_error("Malloc in aggregate_user_stats failed");
+        DBUG_RETURN(1);
+      }
+    }
+    else
+    {
+      /* Aggregate with existing values for this role. */
+      add_user_stats(agg_user,
+                     user->total_connections, user->concurrent_connections,
+                     user->connected_time, user->busy_time, user->cpu_time,
+                     user->bytes_received, user->bytes_sent,
+                     user->binlog_bytes_written,
+                     user->rows_sent, user->rows_read,
+                     user->rows_inserted, user->rows_deleted,
+                     user->rows_updated,
+                     user->select_commands, user->update_commands,
+                     user->other_commands,
+                     user->commit_trans, user->rollback_trans,
+                     user->denied_connections, user->lost_connections,
+                     user->access_denied_errors, user->empty_queries);
+    }
+  }
+  DBUG_PRINT("exit", ("aggregated %lu input into %lu output entries",
+                      all_user_stats->records, agg_user_stats->records));
+  DBUG_RETURN(0);
+}
+#endif
+
+/*
+  Write result to network for SHOW USER_STATISTICS
+
+  SYNOPSIS
+  send_user_stats
+  all_user_stats - values to return
+  table - I_S table
+
+  RETURN
+  0 - OK
+  1 - error
+*/
+
+int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table)
+{
+  DBUG_ENTER("send_user_stats");
+
+  for (uint i= 0; i < all_user_stats->records; i++)
+  {
+    uint j= 0;
+    USER_STATS *user_stats= (USER_STATS*) my_hash_element(all_user_stats, i);
+    
+    table->field[j++]->store(user_stats->user, user_stats->user_name_length,
+                             system_charset_info);
+    table->field[j++]->store((longlong)user_stats->total_connections,TRUE);
+    table->field[j++]->store((longlong)user_stats->concurrent_connections, TRUE);
+    table->field[j++]->store((longlong)user_stats->connected_time, TRUE);
+    table->field[j++]->store((double)user_stats->busy_time);
+    table->field[j++]->store((double)user_stats->cpu_time);
+    table->field[j++]->store((longlong)user_stats->bytes_received, TRUE);
+    table->field[j++]->store((longlong)user_stats->bytes_sent, TRUE);
+    table->field[j++]->store((longlong)user_stats->binlog_bytes_written, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_read, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_sent, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_deleted, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_inserted, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_updated, TRUE);
+    table->field[j++]->store((longlong)user_stats->select_commands, TRUE);
+    table->field[j++]->store((longlong)user_stats->update_commands, TRUE);
+    table->field[j++]->store((longlong)user_stats->other_commands, TRUE);
+    table->field[j++]->store((longlong)user_stats->commit_trans, TRUE);
+    table->field[j++]->store((longlong)user_stats->rollback_trans, TRUE);
+    table->field[j++]->store((longlong)user_stats->denied_connections, TRUE);
+    table->field[j++]->store((longlong)user_stats->lost_connections, TRUE);
+    table->field[j++]->store((longlong)user_stats->access_denied_errors, TRUE);
+    table->field[j++]->store((longlong)user_stats->empty_queries, TRUE);
+    if (schema_table_store_record(thd, table))
+    {
+      DBUG_PRINT("error", ("store record error"));
+      DBUG_RETURN(1);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+/*
+  Process SHOW USER_STATISTICS
+
+  SYNOPSIS
+  mysqld_show_user_stats
+  thd - current thread
+  wild - limit results to the entry for this user
+  with_roles - when true, display role for mapped users
+
+  RETURN
+  0 - OK
+  1 - error
+*/
+
+int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond)
+{
+  TABLE *table= tables->table;
+  int result;
+  DBUG_ENTER("fill_schema_user_stats");
+
+  if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
+    DBUG_RETURN(1);
+
+  /*
+    Iterates through all the global stats and sends them to the client.
+    Pattern matching on the client IP is supported.
+  */
+
+  mysql_mutex_lock(&LOCK_global_user_client_stats);
+  result= send_user_stats(thd, &global_user_stats, table) != 0;
+  mysql_mutex_unlock(&LOCK_global_user_client_stats);
+
+  DBUG_PRINT("exit", ("result: %d", result));
+  DBUG_RETURN(result);
+}
+
+/*
+   Process SHOW CLIENT_STATISTICS
+
+   SYNOPSIS
+     mysqld_show_client_stats
+       thd - current thread
+       wild - limit results to the entry for this client
+
+   RETURN
+     0 - OK
+     1 - error
+*/
+
+int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond)
+{
+  TABLE *table= tables->table;
+  int result;
+  DBUG_ENTER("fill_schema_client_stats");
+
+  if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
+    DBUG_RETURN(1);
+
+  /*
+    Iterates through all the global stats and sends them to the client.
+    Pattern matching on the client IP is supported.
+  */
+
+  mysql_mutex_lock(&LOCK_global_user_client_stats);
+  result= send_user_stats(thd, &global_client_stats, table) != 0;
+  mysql_mutex_unlock(&LOCK_global_user_client_stats);
+
+  DBUG_PRINT("exit", ("result: %d", result));
+  DBUG_RETURN(result);
+}
+
+
+/* Fill information schema table with table statistics */
+
+int fill_schema_table_stats(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+  TABLE *table= tables->table;
+  DBUG_ENTER("fill_schema_table_stats");
+
+  mysql_mutex_lock(&LOCK_global_table_stats);
+  for (uint i= 0; i < global_table_stats.records; i++)
+  {
+    char *end_of_schema;
+    TABLE_STATS *table_stats= 
+      (TABLE_STATS*)my_hash_element(&global_table_stats, i);
+    TABLE_LIST tmp_table;
+    size_t schema_length, table_name_length;
+
+    end_of_schema= strend(table_stats->table);
+    schema_length= (size_t) (end_of_schema - table_stats->table);
+    table_name_length= strlen(table_stats->table + schema_length + 1);
+
+    bzero((char*) &tmp_table,sizeof(tmp_table));
+    tmp_table.db=         table_stats->table;
+    tmp_table.table_name= end_of_schema+1;
+    tmp_table.grant.privilege= 0;
+    if (check_access(thd, SELECT_ACL, tmp_table.db,
+                     &tmp_table.grant.privilege, NULL, 0, 1) ||
+        check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX,
+                    1))
+      continue;
+
+    table->field[0]->store(table_stats->table, schema_length,
+                           system_charset_info);
+    table->field[1]->store(table_stats->table + schema_length+1,
+                           table_name_length, system_charset_info);
+    table->field[2]->store((longlong)table_stats->rows_read, TRUE);
+    table->field[3]->store((longlong)table_stats->rows_changed, TRUE);
+    table->field[4]->store((longlong)table_stats->rows_changed_x_indexes,
+                           TRUE);
+    if (schema_table_store_record(thd, table))
+    {
+      mysql_mutex_unlock(&LOCK_global_table_stats);
+      DBUG_RETURN(1);
+    }
+  }
+  mysql_mutex_unlock(&LOCK_global_table_stats);
+  DBUG_RETURN(0);
+}
+
+
+/* Fill information schema table with index statistics */
+
+int fill_schema_index_stats(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+  TABLE *table= tables->table;
+  DBUG_ENTER("fill_schema_index_stats");
+
+  mysql_mutex_lock(&LOCK_global_index_stats);
+  for (uint i= 0; i < global_index_stats.records; i++)
+  {
+    INDEX_STATS *index_stats =
+      (INDEX_STATS*) my_hash_element(&global_index_stats, i);
+    TABLE_LIST tmp_table;
+    char *index_name;
+    size_t schema_name_length, table_name_length, index_name_length;
+
+    bzero((char*) &tmp_table,sizeof(tmp_table));
+    tmp_table.db=         index_stats->index;
+    tmp_table.table_name= strend(index_stats->index)+1;
+    tmp_table.grant.privilege= 0;
+    if (check_access(thd, SELECT_ACL, tmp_table.db,
+                      &tmp_table.grant.privilege, NULL, 0, 1) ||
+        check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
+      continue;
+
+    index_name=         strend(tmp_table.table_name)+1; 
+    schema_name_length= (tmp_table.table_name - index_stats->index) -1;
+    table_name_length=  (index_name - tmp_table.table_name)-1;
+    index_name_length=  (index_stats->index_name_length - schema_name_length -
+                         table_name_length - 3);
+
+    table->field[0]->store(tmp_table.db, schema_name_length,
+                           system_charset_info);
+    table->field[1]->store(tmp_table.table_name, table_name_length,
+                           system_charset_info);
+    table->field[2]->store(index_name, index_name_length, system_charset_info);
+    table->field[3]->store((longlong)index_stats->rows_read, TRUE);
+
+    if (schema_table_store_record(thd, table))
+    { 
+      mysql_mutex_unlock(&LOCK_global_index_stats);
+      DBUG_RETURN(1);
+    }
+  }
+  mysql_mutex_unlock(&LOCK_global_index_stats);
+  DBUG_RETURN(0);
+}
+
 
 /* collect status for all running threads */
 
@@ -2356,10 +2770,10 @@ void calc_sum_of_all_status(STATUS_VAR *to)
 
   I_List_iterator<THD> it(threads);
   THD *tmp;
-  
+
   /* Get global values as base */
   *to= global_status_var;
-  
+
   /* Add to this status from existing threads */
   while ((tmp= it++))
     add_to_status(to, &tmp->status_var);
@@ -2396,11 +2810,12 @@ typedef struct st_lookup_field_values
 bool schema_table_store_record(THD *thd, TABLE *table)
 {
   int error;
-  if ((error= table->file->ha_write_row(table->record[0])))
+  if ((error= table->file->ha_write_tmp_row(table->record[0])))
   {
-    if (create_myisam_from_heap(thd, table, 
-                                table->pos_in_table_list->schema_table_param,
-                                error, 0))
+    TMP_TABLE_PARAM *param= table->pos_in_table_list->schema_table_param;
+    if (create_internal_tmp_table_from_heap(thd, table, param->start_recinfo, 
+                                            &param->recinfo, error, 0))
+
       return 1;
   }
   return 0;
@@ -2419,17 +2834,17 @@ static int make_table_list(THD *thd, SELECT_LEX *sel,
 
 
 /**
-  @brief    Get lookup value from the part of 'WHERE' condition 
+  @brief    Get lookup value from the part of 'WHERE' condition
 
-  @details This function gets lookup value from 
-           the part of 'WHERE' condition if it's possible and 
+  @details This function gets lookup value from
+           the part of 'WHERE' condition if it's possible and
            fill appropriate lookup_field_vals struct field
            with this value.
 
   @param[in]      thd                   thread handler
   @param[in]      item_func             part of WHERE condition
   @param[in]      table                 I_S table
-  @param[in, out] lookup_field_vals     Struct which holds lookup values 
+  @param[in, out] lookup_field_vals     Struct which holds lookup values
 
   @return
     0             success
@@ -2437,7 +2852,7 @@ static int make_table_list(THD *thd, SELECT_LEX *sel,
 */
 
 bool get_lookup_value(THD *thd, Item_func *item_func,
-                      TABLE_LIST *table, 
+                      TABLE_LIST *table,
                       LOOKUP_FIELD_VALUES *lookup_field_vals)
 {
   ST_SCHEMA_TABLE *schema_table= table->schema_table;
@@ -2503,16 +2918,16 @@ bool get_lookup_value(THD *thd, Item_func *item_func,
 
 
 /**
-  @brief    Calculates lookup values from 'WHERE' condition 
+  @brief    Calculates lookup values from 'WHERE' condition
 
   @details This function calculates lookup value(database name, table name)
-           from 'WHERE' condition if it's possible and 
+           from 'WHERE' condition if it's possible and
            fill lookup_field_vals struct fields with these values.
 
   @param[in]      thd                   thread handler
   @param[in]      cond                  WHERE condition
   @param[in]      table                 I_S table
-  @param[in, out] lookup_field_vals     Struct which holds lookup values 
+  @param[in, out] lookup_field_vals     Struct which holds lookup values
 
   @return
     0             success
@@ -2587,7 +3002,7 @@ bool uses_only_table_name_fields(Item *item, TABLE_LIST *table)
   else if (item->type() == Item::REF_ITEM)
     return uses_only_table_name_fields(item->real_item(), table);
 
-  if (item->type() == Item::SUBSELECT_ITEM && !item->const_item())
+  if (item->real_type() == Item::SUBSELECT_ITEM && !item->const_item())
     return 0;
 
   return 1;
@@ -2661,7 +3076,7 @@ static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table)
   @param[in]      thd                   thread handler
   @param[in]      cond                  WHERE condition
   @param[in]      tables                I_S table
-  @param[in, out] lookup_field_values   Struct which holds lookup values 
+  @param[in, out] lookup_field_values   Struct which holds lookup values
 
   @return
     0             success
@@ -2741,7 +3156,7 @@ enum enum_schema_tables get_schema_table_idx(ST_SCHEMA_TABLE *schema_table)
     idx_field_vals        idx_field_vals->db_name contains db name or
                           wild string
     with_i_schema         returns 1 if we added 'IS' name to list
-                          otherwise returns 0 
+                          otherwise returns 0
 
   RETURN
     zero                  success
@@ -2765,7 +3180,7 @@ int make_db_list(THD *thd, List<LEX_STRING> *files,
       LIKE clause (see also get_index_field_values() function)
     */
     if (!lookup_field_vals->db_value.str ||
-        !wild_case_compare(system_charset_info, 
+        !wild_case_compare(system_charset_info,
                            INFORMATION_SCHEMA_NAME.str,
                            lookup_field_vals->db_value.str))
     {
@@ -2809,7 +3224,7 @@ int make_db_list(THD *thd, List<LEX_STRING> *files,
 }
 
 
-struct st_add_schema_table 
+struct st_add_schema_table
 {
   List<LEX_STRING> *files;
   const char *wild;
@@ -2873,7 +3288,7 @@ int schema_tables_add(THD *thd, List<LEX_STRING> *files, const char *wild)
       else if (wild_compare(tmp_schema_table->table_name, wild, 0))
         continue;
     }
-    if ((file_name= 
+    if ((file_name=
          thd->make_lex_string(file_name, tmp_schema_table->table_name,
                               strlen(tmp_schema_table->table_name), TRUE)) &&
         !files->push_back(file_name))
@@ -2935,7 +3350,7 @@ make_table_name_list(THD *thd, List<LEX_STRING> *table_names, LEX *lex,
       }
     }
     else
-    {    
+    {
       if (table_names->push_back(&lookup_field_vals->table_value))
         return 1;
       /*
@@ -3102,7 +3517,8 @@ fill_schema_table_by_open(THD *thd, bool is_show_fields_or_keys,
                                          (MYSQL_OPEN_IGNORE_FLUSH |
                                           MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL |
                                           (can_deadlock ?
-                                           MYSQL_OPEN_FAIL_ON_MDL_CONFLICT : 0)));
+                                           MYSQL_OPEN_FAIL_ON_MDL_CONFLICT : 0)),
+                                         DT_PREPARE | DT_CREATE);
   /*
     Restore old value of sql_command back as it is being looked at in
     process_table() function.
@@ -3501,7 +3917,6 @@ static int fill_schema_table_from_frm(THD *thd, TABLE_LIST *tables,
     res= schema_table->process_table(thd, &table_list, table,
                                      res, db_name, table_name);
     free_root(&tbl.mem_root, MYF(0));
-    my_free((void *) tbl.alias);
   }
 
 end_share:
@@ -3673,7 +4088,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
 
   if (!lookup_field_vals.wild_db_value && !lookup_field_vals.wild_table_value)
   {
-    /* 
+    /*
       if lookup value is empty string then
       it's impossible table name or db name
     */
@@ -3691,7 +4106,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
       !lookup_field_vals.wild_db_value)
     tables->has_db_lookup_value= TRUE;
   if (lookup_field_vals.table_value.length &&
-      !lookup_field_vals.wild_table_value) 
+      !lookup_field_vals.wild_table_value)
     tables->has_table_lookup_value= TRUE;
 
   if (tables->has_db_lookup_value && tables->has_table_lookup_value)
@@ -3741,7 +4156,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
         {
           /*
             If table is I_S.tables and open_table_method is 0 (eg SKIP_OPEN)
-            we can skip table opening and we don't have lookup value for 
+            we can skip table opening and we don't have lookup value for
             table name or lookup value is wild string(table name list is
             already created by make_table_name_list() function).
           */
@@ -3848,7 +4263,7 @@ int fill_schema_schemata(THD *thd, TABLE_LIST *tables, COND *cond)
 
   if (get_lookup_field_values(thd, cond, tables, &lookup_field_vals))
     DBUG_RETURN(0);
-  DBUG_PRINT("INDEX VALUES",("db_name='%s', table_name='%s'",
+  DBUG_PRINT("INDEX VALUES",("db_name: %s  table_name: %s",
                              lookup_field_vals.db_value.str,
                              lookup_field_vals.table_value.str));
   if (make_db_list(thd, &db_names, &lookup_field_vals,
@@ -3936,7 +4351,8 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
   }
   else
   {
-    char option_buff[350],*ptr;
+    char option_buff[350];
+    String str(option_buff,sizeof(option_buff), system_charset_info);
     TABLE *show_table= tables->table;
     TABLE_SHARE *share= show_table->s;
     handler *file= show_table->file;
@@ -3974,58 +4390,71 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
     table->field[4]->store(tmp_buff, strlen(tmp_buff), cs);
     table->field[5]->store((longlong) share->frm_version, TRUE);
 
-    ptr=option_buff;
+    str.length(0);
 
     if (share->min_rows)
     {
-      ptr=strmov(ptr," min_rows=");
-      ptr=longlong10_to_str(share->min_rows,ptr,10);
+      str.qs_append(STRING_WITH_LEN(" min_rows="));
+      str.qs_append(share->min_rows);
     }
 
     if (share->max_rows)
     {
-      ptr=strmov(ptr," max_rows=");
-      ptr=longlong10_to_str(share->max_rows,ptr,10);
+      str.qs_append(STRING_WITH_LEN(" max_rows="));
+      str.qs_append(share->max_rows);
     }
 
     if (share->avg_row_length)
     {
-      ptr=strmov(ptr," avg_row_length=");
-      ptr=longlong10_to_str(share->avg_row_length,ptr,10);
+      str.qs_append(STRING_WITH_LEN(" avg_row_length="));
+      str.qs_append(share->avg_row_length);
     }
 
     if (share->db_create_options & HA_OPTION_PACK_KEYS)
-      ptr=strmov(ptr," pack_keys=1");
+      str.qs_append(STRING_WITH_LEN(" pack_keys=1"));
 
     if (share->db_create_options & HA_OPTION_NO_PACK_KEYS)
-      ptr=strmov(ptr," pack_keys=0");
+      str.qs_append(STRING_WITH_LEN(" pack_keys=0"));
 
     /* We use CHECKSUM, instead of TABLE_CHECKSUM, for backward compability */
     if (share->db_create_options & HA_OPTION_CHECKSUM)
-      ptr=strmov(ptr," checksum=1");
+      str.qs_append(STRING_WITH_LEN(" checksum=1"));
+
+    if (share->page_checksum != HA_CHOICE_UNDEF)
+    {
+      str.qs_append(STRING_WITH_LEN(" page_checksum="));
+      str.qs_append(ha_choice_values[(uint) share->page_checksum]);
+    }
 
     if (share->db_create_options & HA_OPTION_DELAY_KEY_WRITE)
-      ptr=strmov(ptr," delay_key_write=1");
+      str.qs_append(STRING_WITH_LEN(" delay_key_write=1"));
 
     if (share->row_type != ROW_TYPE_DEFAULT)
-      ptr=strxmov(ptr, " row_format=", 
-                  ha_row_type[(uint) share->row_type],
-                  NullS);
+    {
+      str.qs_append(STRING_WITH_LEN(" row_format="));
+      str.qs_append(ha_row_type[(uint) share->row_type]);
+    }
 
     if (share->key_block_size)
     {
-      ptr= strmov(ptr, " KEY_BLOCK_SIZE=");
-      ptr= longlong10_to_str(share->key_block_size, ptr, 10);
+      str.qs_append(STRING_WITH_LEN(" key_block_size="));
+      str.qs_append(share->key_block_size);
     }
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
     if (is_partitioned)
-      ptr= strmov(ptr, " partitioned");
+      str.qs_append(STRING_WITH_LEN(" partitioned"));
 #endif
 
-    table->field[19]->store(option_buff+1,
-                            (ptr == option_buff ? 0 : 
-                             (uint) (ptr-option_buff)-1), cs);
+    if (share->transactional != HA_CHOICE_UNDEF)
+    {
+      str.qs_append(STRING_WITH_LEN(" transactional="));
+      str.qs_append(ha_choice_values[(uint) share->transactional]);
+    }
+    append_create_options(thd, &str, share->option_list);
+
+    if (str.length())
+      table->field[19]->store(str.ptr()+1, str.length()-1, cs);
 
     tmp_buff= (share->table_charset ?
                share->table_charset->name : "default");
@@ -4071,7 +4500,7 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
         tmp_buff= "Compact";
         break;
       case ROW_TYPE_PAGE:
-        tmp_buff= "Paged";
+        tmp_buff= "Page";
         break;
       }
 
@@ -4101,24 +4530,24 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
       {
         thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                                   (my_time_t) file->stats.create_time);
-        table->field[14]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+        table->field[14]->store_time(&time);
         table->field[14]->set_notnull();
       }
       if (file->stats.update_time)
       {
         thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                                   (my_time_t) file->stats.update_time);
-        table->field[15]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+        table->field[15]->store_time(&time);
         table->field[15]->set_notnull();
       }
       if (file->stats.check_time)
       {
         thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                                   (my_time_t) file->stats.check_time);
-        table->field[16]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+        table->field[16]->store_time(&time);
         table->field[16]->set_notnull();
       }
-      if (file->ha_table_flags() & (ulong) HA_HAS_CHECKSUM)
+      if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
       {
         table->field[18]->store((longlong) file->checksum(), TRUE);
         table->field[18]->set_notnull();
@@ -4161,8 +4590,8 @@ err:
   @return         void
 */
 
-void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs,
-                       uint offset)
+static void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs,
+                              uint offset)
 {
   bool is_blob;
   int decimals, field_length;
@@ -4172,8 +4601,8 @@ void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs,
 
   field->sql_type(column_type);
   /* DTD_IDENTIFIER column */
-  table->field[offset + 7]->store(column_type.ptr(), column_type.length(), cs);
-  table->field[offset + 7]->set_notnull();
+  table->field[offset + 8]->store(column_type.ptr(), column_type.length(), cs);
+  table->field[offset + 8]->set_notnull();
   /*
     DATA_TYPE column:
     MySQL column type has the following format:
@@ -4186,7 +4615,7 @@ void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs,
       if there is no dimention part then check the presence of
       [unsigned] [zerofill] attributes and cut them of if exist.
     */
-    tmp_buff= strchr(column_type.ptr(), ' ');
+    tmp_buff= strchr(column_type.c_ptr_safe(), ' ');
   table->field[offset]->store(column_type.ptr(),
                               (tmp_buff ? tmp_buff - column_type.ptr() :
                                column_type.length()), cs);
@@ -4215,6 +4644,7 @@ void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs,
     They are set to -1 if they should not be set (we should return NULL)
   */
 
+  field_length= -1;
   decimals= field->decimals();
   switch (field->type()) {
   case MYSQL_TYPE_NEWDECIMAL:
@@ -4243,8 +4673,14 @@ void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs,
     if (decimals == NOT_FIXED_DEC)
       decimals= -1;                           // return NULL
     break;
+  case MYSQL_TYPE_TIME:
+  case MYSQL_TYPE_TIMESTAMP:
+  case MYSQL_TYPE_DATETIME:
+    /* DATETIME_PRECISION column */
+    table->field[offset + 5]->store((longlong) field->decimals(), TRUE);
+    table->field[offset + 5]->set_notnull();
+    break;
   default:
-    field_length= decimals= -1;
     break;
   }
 
@@ -4253,23 +4689,24 @@ void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs,
   {
     table->field[offset + 3]->store((longlong) field_length, TRUE);
     table->field[offset + 3]->set_notnull();
-  }
-  /* NUMERIC_SCALE column */
-  if (decimals >= 0)
-  {
-    table->field[offset + 4]->store((longlong) decimals, TRUE);
-    table->field[offset + 4]->set_notnull();
+
+    /* NUMERIC_SCALE column */
+    if (decimals >= 0)
+    {
+      table->field[offset + 4]->store((longlong) decimals, TRUE);
+      table->field[offset + 4]->set_notnull();
+    }
   }
   if (field->has_charset())
   {
     /* CHARACTER_SET_NAME column*/
     tmp_buff= field->charset()->csname;
-    table->field[offset + 5]->store(tmp_buff, strlen(tmp_buff), cs);
-    table->field[offset + 5]->set_notnull();
-    /* COLLATION_NAME column */
-    tmp_buff= field->charset()->name;
     table->field[offset + 6]->store(tmp_buff, strlen(tmp_buff), cs);
     table->field[offset + 6]->set_notnull();
+    /* COLLATION_NAME column */
+    tmp_buff= field->charset()->name;
+    table->field[offset + 7]->store(tmp_buff, strlen(tmp_buff), cs);
+    table->field[offset + 7]->set_notnull();
   }
 }
 
@@ -4345,7 +4782,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
         end=strmov(end,grant_types.type_names[bitnr]);
       }
     }
-    table->field[17]->store(tmp+1,end == tmp ? 0 : (uint) (end-tmp-1), cs);
+    table->field[18]->store(tmp+1,end == tmp ? 0 : (uint) (end-tmp-1), cs);
 
 #endif
     table->field[0]->store(STRING_WITH_LEN("def"), cs);
@@ -4354,8 +4791,6 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
     table->field[3]->store(field->field_name, strlen(field->field_name),
                            cs);
     table->field[4]->store((longlong) count, TRUE);
-    field->sql_type(type);
-    table->field[14]->store(type.ptr(), type.length(), cs);
 
     if (get_field_default_value(thd, timestamp_field, field, &type, 0))
     {
@@ -4369,17 +4804,23 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
     pos=(uchar*) ((field->flags & PRI_KEY_FLAG) ? "PRI" :
                  (field->flags & UNIQUE_KEY_FLAG) ? "UNI" :
                  (field->flags & MULTIPLE_KEY_FLAG) ? "MUL":"");
-    table->field[15]->store((const char*) pos,
+    table->field[16]->store((const char*) pos,
                             strlen((const char*) pos), cs);
 
     if (field->unireg_check == Field::NEXT_NUMBER)
-      table->field[16]->store(STRING_WITH_LEN("auto_increment"), cs);
+      table->field[17]->store(STRING_WITH_LEN("auto_increment"), cs);
     if (timestamp_field == field &&
         field->unireg_check != Field::TIMESTAMP_DN_FIELD)
-      table->field[16]->store(STRING_WITH_LEN("on update CURRENT_TIMESTAMP"),
+      table->field[17]->store(STRING_WITH_LEN("on update CURRENT_TIMESTAMP"),
                               cs);
-
-    table->field[18]->store(field->comment.str, field->comment.length, cs);
+    if (field->vcol_info)
+    {
+      if (field->stored_in_db)
+        table->field[17]->store(STRING_WITH_LEN("PERSISTENT"), cs);
+      else
+        table->field[17]->store(STRING_WITH_LEN("VIRTUAL"), cs);
+    }
+    table->field[19]->store(field->comment.str, field->comment.length, cs);
     if (schema_table_store_record(thd, table))
       DBUG_RETURN(1);
   }
@@ -4399,7 +4840,7 @@ int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond)
        cs++)
   {
     CHARSET_INFO *tmp_cs= cs[0];
-    if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) && 
+    if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) &&
         (tmp_cs->state & MY_CS_AVAILABLE) &&
         !(tmp_cs->state & MY_CS_HIDDEN) &&
         !(wild && wild[0] &&
@@ -4435,7 +4876,7 @@ static my_bool iter_schema_engines(THD *thd, plugin_ref plugin,
   if (plugin_state(plugin) != PLUGIN_IS_READY)
   {
 
-    struct st_mysql_plugin *plug= plugin_decl(plugin);
+    struct st_maria_plugin *plug= plugin_decl(plugin);
     if (!(wild && wild[0] &&
           wild_case_compare(scs, plug->name,wild)))
     {
@@ -4516,7 +4957,7 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond)
          cl ++)
     {
       CHARSET_INFO *tmp_cl= cl[0];
-      if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) || 
+      if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) ||
           !my_charset_same(tmp_cs, tmp_cl))
 	continue;
       if (!(wild && wild[0] &&
@@ -4552,7 +4993,7 @@ int fill_schema_coll_charset_app(THD *thd, TABLE_LIST *tables, COND *cond)
   {
     CHARSET_INFO **cl;
     CHARSET_INFO *tmp_cs= cs[0];
-    if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) || 
+    if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) ||
         !(tmp_cs->state & MY_CS_PRIMARY))
       continue;
     for (cl= all_charsets;
@@ -4667,7 +5108,7 @@ bool store_schema_params(THD *thd, TABLE *table, TABLE *proc_table,
       table->field[3]->store((longlong) 0, TRUE);
       get_field(thd->mem_root, proc_table->field[MYSQL_PROC_MYSQL_TYPE],
                 &tmp_string);
-      table->field[14]->store(tmp_string.ptr(), tmp_string.length(), cs);
+      table->field[15]->store(tmp_string.ptr(), tmp_string.length(), cs);
       field_def= &sp->m_return_field_def;
       field= make_field(&share, (uchar*) 0, field_def->length,
                         (uchar*) "", 0, field_def->pack_flag,
@@ -4720,7 +5161,7 @@ bool store_schema_params(THD *thd, TABLE *table, TABLE *proc_table,
       table->field[5]->set_notnull();
       get_field(thd->mem_root, proc_table->field[MYSQL_PROC_MYSQL_TYPE],
                 &tmp_string);
-      table->field[14]->store(tmp_string.ptr(), tmp_string.length(), cs);
+      table->field[15]->store(tmp_string.ptr(), tmp_string.length(), cs);
 
       field= make_field(&share, (uchar*) 0, field_def->length,
                         (uchar*) "", 0, field_def->pack_flag,
@@ -4753,7 +5194,7 @@ bool store_schema_proc(THD *thd, TABLE *table, TABLE *proc_table,
   MYSQL_TIME time;
   LEX *lex= thd->lex;
   CHARSET_INFO *cs= system_charset_info;
-  char sp_db_buff[NAME_LEN + 1], sp_name_buff[NAME_LEN + 1],
+  char sp_db_buff[SAFE_NAME_LEN + 1], sp_name_buff[NAME_LEN + 1],
     definer_buff[USERNAME_LENGTH + HOSTNAME_LENGTH + 2],
     returns_buff[MAX_FIELD_WIDTH];
 
@@ -4837,40 +5278,40 @@ bool store_schema_proc(THD *thd, TABLE *table, TABLE *proc_table,
 
       if (full_access)
       {
-        copy_field_as_string(table->field[14],
+        copy_field_as_string(table->field[15],
                              proc_table->field[MYSQL_PROC_FIELD_BODY_UTF8]);
-        table->field[14]->set_notnull();
+        table->field[15]->set_notnull();
       }
-      table->field[13]->store(STRING_WITH_LEN("SQL"), cs);
-      table->field[17]->store(STRING_WITH_LEN("SQL"), cs);
-      copy_field_as_string(table->field[18],
+      table->field[14]->store(STRING_WITH_LEN("SQL"), cs);
+      table->field[18]->store(STRING_WITH_LEN("SQL"), cs);
+      copy_field_as_string(table->field[19],
                            proc_table->field[MYSQL_PROC_FIELD_DETERMINISTIC]);
-      table->field[19]->store(sp_data_access_name[enum_idx].str, 
+      table->field[20]->store(sp_data_access_name[enum_idx].str, 
                               sp_data_access_name[enum_idx].length , cs);
-      copy_field_as_string(table->field[21],
+      copy_field_as_string(table->field[22],
                            proc_table->field[MYSQL_PROC_FIELD_SECURITY_TYPE]);
 
       bzero((char *)&time, sizeof(time));
       ((Field_timestamp *) proc_table->field[MYSQL_PROC_FIELD_CREATED])->
         get_time(&time);
-      table->field[22]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      table->field[23]->store_time(&time);
       bzero((char *)&time, sizeof(time));
       ((Field_timestamp *) proc_table->field[MYSQL_PROC_FIELD_MODIFIED])->
         get_time(&time);
-      table->field[23]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
-      copy_field_as_string(table->field[24],
-                           proc_table->field[MYSQL_PROC_FIELD_SQL_MODE]);
+      table->field[24]->store_time(&time);
       copy_field_as_string(table->field[25],
+                           proc_table->field[MYSQL_PROC_FIELD_SQL_MODE]);
+      copy_field_as_string(table->field[26],
                            proc_table->field[MYSQL_PROC_FIELD_COMMENT]);
 
-      table->field[26]->store(definer.ptr(), definer.length(), cs);
-      copy_field_as_string(table->field[27],
+      table->field[27]->store(definer.ptr(), definer.length(), cs);
+      copy_field_as_string(table->field[28],
                            proc_table->
                            field[MYSQL_PROC_FIELD_CHARACTER_SET_CLIENT]);
-      copy_field_as_string(table->field[28],
+      copy_field_as_string(table->field[29],
                            proc_table->
                            field[MYSQL_PROC_FIELD_COLLATION_CONNECTION]);
-      copy_field_as_string(table->field[29],
+      copy_field_as_string(table->field[30],
 			   proc_table->field[MYSQL_PROC_FIELD_DB_COLLATION]);
 
       return schema_table_store_record(thd, table);
@@ -4910,7 +5351,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond)
     DBUG_RETURN(1);
   }
   proc_table->file->ha_index_init(0, 1);
-  if ((res= proc_table->file->index_first(proc_table->record[0])))
+  if ((res= proc_table->file->ha_index_first(proc_table->record[0])))
   {
     res= (res == HA_ERR_END_OF_FILE) ? 0 : 1;
     goto err;
@@ -4923,7 +5364,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond)
     res= 1;
     goto err;
   }
-  while (!proc_table->file->index_next(proc_table->record[0]))
+  while (!proc_table->file->ha_index_next(proc_table->record[0]))
   {
     if (schema_table_idx == SCH_PROCEDURES ?
         store_schema_proc(thd, table, proc_table, wild, full_access, definer): 
@@ -5221,7 +5662,7 @@ static int get_schema_constraints_record(THD *thd, TABLE_LIST *tables,
     TABLE *show_table= tables->table;
     KEY *key_info=show_table->key_info;
     uint primary_key= show_table->s->primary_key;
-    show_table->file->info(HA_STATUS_VARIABLE | 
+    show_table->file->info(HA_STATUS_VARIABLE |
                            HA_STATUS_NO_LOCK |
                            HA_STATUS_TIME);
     for (uint i=0 ; i < show_table->s->keys ; i++, key_info++)
@@ -5250,7 +5691,7 @@ static int get_schema_constraints_record(THD *thd, TABLE_LIST *tables,
     List_iterator_fast<FOREIGN_KEY_INFO> it(f_key_list);
     while ((f_key_info=it++))
     {
-      if (store_constraints(thd, table, db_name, table_name, 
+      if (store_constraints(thd, table, db_name, table_name,
                             f_key_info->foreign_id->str,
                             strlen(f_key_info->foreign_id->str),
                             "FOREIGN KEY", 11))
@@ -5408,7 +5849,7 @@ static int get_schema_key_column_usage_record(THD *thd,
     TABLE *show_table= tables->table;
     KEY *key_info=show_table->key_info;
     uint primary_key= show_table->s->primary_key;
-    show_table->file->info(HA_STATUS_VARIABLE | 
+    show_table->file->info(HA_STATUS_VARIABLE |
                            HA_STATUS_NO_LOCK |
                            HA_STATUS_TIME);
     for (uint i=0 ; i < show_table->s->keys ; i++, key_info++)
@@ -5425,8 +5866,8 @@ static int get_schema_key_column_usage_record(THD *thd,
           restore_record(table, s->default_values);
           store_key_column_usage(table, db_name, table_name,
                                  key_info->name,
-                                 strlen(key_info->name), 
-                                 key_part->field->field_name, 
+                                 strlen(key_info->name),
+                                 key_part->field->field_name,
                                  strlen(key_part->field->field_name),
                                  (longlong) f_idx);
           if (schema_table_store_record(thd, table))
@@ -5462,7 +5903,7 @@ static int get_schema_key_column_usage_record(THD *thd,
                                system_charset_info);
         table->field[9]->set_notnull();
         table->field[10]->store(f_key_info->referenced_table->str,
-                                f_key_info->referenced_table->length, 
+                                f_key_info->referenced_table->length,
                                 system_charset_info);
         table->field[10]->set_notnull();
         table->field[11]->store(r_info->str, r_info->length,
@@ -5572,24 +6013,24 @@ static void store_schema_partitions_record(THD *thd, TABLE *schema_table,
   {
     thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                               (my_time_t)stat_info.create_time);
-    table->field[18]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    table->field[18]->store_time(&time);
     table->field[18]->set_notnull();
   }
   if (stat_info.update_time)
   {
     thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                               (my_time_t)stat_info.update_time);
-    table->field[19]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    table->field[19]->store_time(&time);
     table->field[19]->set_notnull();
   }
   if (stat_info.check_time)
   {
     thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                               (my_time_t)stat_info.check_time);
-    table->field[20]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    table->field[20]->store_time(&time);
     table->field[20]->set_notnull();
   }
-  if (file->ha_table_flags() & (ulong) HA_HAS_CHECKSUM)
+  if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
   {
     table->field[21]->store((longlong) stat_info.check_sum, TRUE);
     table->field[21]->set_notnull();
@@ -5612,7 +6053,7 @@ static void store_schema_partitions_record(THD *thd, TABLE *schema_table,
                               strlen(part_elem->tablespace_name), cs);
     else
     {
-      char *ts= showing_table->s->tablespace;
+      char *ts= showing_table->file->get_tablespace_name(thd,0,0);
       if(ts)
         table->field[24]->store(ts, strlen(ts), cs);
       else
@@ -5732,7 +6173,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
         tmp_res.append(partition_keywords[PKW_KEY].str,
                        partition_keywords[PKW_KEY].length);
       else
-        tmp_res.append(partition_keywords[PKW_HASH].str, 
+        tmp_res.append(partition_keywords[PKW_HASH].str,
                        partition_keywords[PKW_HASH].length);
       table->field[7]->store(tmp_res.ptr(), tmp_res.length(), cs);
       break;
@@ -5767,7 +6208,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
         tmp_res.append(partition_keywords[PKW_KEY].str,
                        partition_keywords[PKW_KEY].length);
       else
-        tmp_res.append(partition_keywords[PKW_HASH].str, 
+        tmp_res.append(partition_keywords[PKW_HASH].str,
                        partition_keywords[PKW_HASH].length);
       table->field[8]->store(tmp_res.ptr(), tmp_res.length(), cs);
       table->field[8]->set_notnull();
@@ -5880,7 +6321,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
           /* SUBPARTITION_ORDINAL_POSITION */
           table->field[6]->store((longlong) ++subpart_pos, TRUE);
           table->field[6]->set_notnull();
-          
+
           store_schema_partitions_record(thd, table, show_table, subpart_elem,
                                          file, part_id);
           part_id++;
@@ -5939,7 +6380,7 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
 
   if (et.load_from_row(thd, event_table))
   {
-    my_error(ER_CANNOT_LOAD_FROM_TABLE, MYF(0), event_table->alias);
+    my_error(ER_CANNOT_LOAD_FROM_TABLE, MYF(0), event_table->alias.c_ptr());
     DBUG_RETURN(1);
   }
 
@@ -6001,15 +6442,13 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
     /* starts & ends . STARTS is always set - see sql_yacc.yy */
     et.time_zone->gmt_sec_to_TIME(&time, et.starts);
     sch_table->field[ISE_STARTS]->set_notnull();
-    sch_table->field[ISE_STARTS]->
-                                store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    sch_table->field[ISE_STARTS]->store_time(&time);
 
     if (!et.ends_null)
     {
       et.time_zone->gmt_sec_to_TIME(&time, et.ends);
       sch_table->field[ISE_ENDS]->set_notnull();
-      sch_table->field[ISE_ENDS]->
-                                store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      sch_table->field[ISE_ENDS]->store_time(&time);
     }
   }
   else
@@ -6019,8 +6458,7 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
 
     et.time_zone->gmt_sec_to_TIME(&time, et.execute_at);
     sch_table->field[ISE_EXECUTE_AT]->set_notnull();
-    sch_table->field[ISE_EXECUTE_AT]->
-                          store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    sch_table->field[ISE_EXECUTE_AT]->store_time(&time);
   }
 
   /* status */
@@ -6049,22 +6487,20 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
   else
     sch_table->field[ISE_ON_COMPLETION]->
                                 store(STRING_WITH_LEN("PRESERVE"), scs);
-    
-  number_to_datetime(et.created, &time, 0, &not_used);
+
+  number_to_datetime(et.created, 0, &time, 0, &not_used);
   DBUG_ASSERT(not_used==0);
-  sch_table->field[ISE_CREATED]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+  sch_table->field[ISE_CREATED]->store_time(&time);
 
-  number_to_datetime(et.modified, &time, 0, &not_used);
+  number_to_datetime(et.modified, 0, &time, 0, &not_used);
   DBUG_ASSERT(not_used==0);
-  sch_table->field[ISE_LAST_ALTERED]->
-                                store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+  sch_table->field[ISE_LAST_ALTERED]->store_time(&time);
 
   if (et.last_executed)
   {
     et.time_zone->gmt_sec_to_TIME(&time, et.last_executed);
     sch_table->field[ISE_LAST_EXECUTED]->set_notnull();
-    sch_table->field[ISE_LAST_EXECUTED]->
-                       store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    sch_table->field[ISE_LAST_EXECUTED]->store_time(&time);
   }
 
   sch_table->field[ISE_EVENT_COMMENT]->
@@ -6170,7 +6606,7 @@ int fill_status(THD *thd, TABLE_LIST *tables, COND *cond)
     tmp1= &tmp;
   }
   else
-  { 
+  {
     option_type= OPT_SESSION;
     tmp1= &thd->status_var;
   }
@@ -6225,7 +6661,7 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
   {
     List<FOREIGN_KEY_INFO> f_key_list;
     TABLE *show_table= tables->table;
-    show_table->file->info(HA_STATUS_VARIABLE | 
+    show_table->file->info(HA_STATUS_VARIABLE |
                            HA_STATUS_NO_LOCK |
                            HA_STATUS_TIME);
 
@@ -6243,20 +6679,20 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
       table->field[3]->store(STRING_WITH_LEN("def"), cs);
       table->field[4]->store(f_key_info->referenced_db->str, 
                              f_key_info->referenced_db->length, cs);
-      table->field[10]->store(f_key_info->referenced_table->str, 
+      table->field[10]->store(f_key_info->referenced_table->str,
                              f_key_info->referenced_table->length, cs);
       if (f_key_info->referenced_key_name)
       {
-        table->field[5]->store(f_key_info->referenced_key_name->str, 
+        table->field[5]->store(f_key_info->referenced_key_name->str,
                                f_key_info->referenced_key_name->length, cs);
         table->field[5]->set_notnull();
       }
       else
         table->field[5]->set_null();
       table->field[6]->store(STRING_WITH_LEN("NONE"), cs);
-      table->field[7]->store(f_key_info->update_method->str, 
+      table->field[7]->store(f_key_info->update_method->str,
                              f_key_info->update_method->length, cs);
-      table->field[8]->store(f_key_info->delete_method->str, 
+      table->field[8]->store(f_key_info->delete_method->str,
                              f_key_info->delete_method->length, cs);
       if (schema_table_store_record(thd, table))
         DBUG_RETURN(1);
@@ -6265,12 +6701,87 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
   DBUG_RETURN(0);
 }
 
-struct schema_table_ref 
+struct schema_table_ref
 {
   const char *table_name;
   ST_SCHEMA_TABLE *schema_table;
 };
 
+ST_FIELD_INFO user_stats_fields_info[]=
+{
+  {"USER", USERNAME_LENGTH, MYSQL_TYPE_STRING, 0, 0, "User", SKIP_OPEN_TABLE},
+  {"TOTAL_CONNECTIONS", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections",SKIP_OPEN_TABLE},
+  {"CONCURRENT_CONNECTIONS", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections",SKIP_OPEN_TABLE},
+  {"CONNECTED_TIME", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time",SKIP_OPEN_TABLE},
+  {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Busy_time",SKIP_OPEN_TABLE},
+  {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Cpu_time",SKIP_OPEN_TABLE},
+  {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_received",SKIP_OPEN_TABLE},
+  {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_sent",SKIP_OPEN_TABLE},
+  {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Binlog_bytes_written",SKIP_OPEN_TABLE},
+  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE},
+  {"ROWS_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_sent",SKIP_OPEN_TABLE},
+  {"ROWS_DELETED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_deleted",SKIP_OPEN_TABLE},
+  {"ROWS_INSERTED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_inserted",SKIP_OPEN_TABLE},
+  {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_updated",SKIP_OPEN_TABLE},
+  {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Select_commands",SKIP_OPEN_TABLE},
+  {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Update_commands",SKIP_OPEN_TABLE},
+  {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Other_commands",SKIP_OPEN_TABLE},
+  {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Commit_transactions",SKIP_OPEN_TABLE},
+  {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rollback_transactions",SKIP_OPEN_TABLE},
+  {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Denied_connections",SKIP_OPEN_TABLE},
+  {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Lost_connections",SKIP_OPEN_TABLE},
+  {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Access_denied",SKIP_OPEN_TABLE},
+  {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Empty_queries",SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
+};
+
+ST_FIELD_INFO client_stats_fields_info[]=
+{
+  {"CLIENT", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Client",SKIP_OPEN_TABLE},
+  {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Total_connections",SKIP_OPEN_TABLE},
+  {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Concurrent_connections",SKIP_OPEN_TABLE},
+  {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Connected_time",SKIP_OPEN_TABLE},
+  {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Busy_time",SKIP_OPEN_TABLE},
+  {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Cpu_time",SKIP_OPEN_TABLE},
+  {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_received",SKIP_OPEN_TABLE},
+  {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_sent",SKIP_OPEN_TABLE},
+  {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Binlog_bytes_written",SKIP_OPEN_TABLE},
+  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE},
+  {"ROWS_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_sent",SKIP_OPEN_TABLE},
+  {"ROWS_DELETED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_deleted",SKIP_OPEN_TABLE},
+  {"ROWS_INSERTED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_inserted",SKIP_OPEN_TABLE},
+  {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_updated",SKIP_OPEN_TABLE},
+  {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Select_commands",SKIP_OPEN_TABLE},
+  {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Update_commands",SKIP_OPEN_TABLE},
+  {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Other_commands",SKIP_OPEN_TABLE},
+  {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Commit_transactions",SKIP_OPEN_TABLE},
+  {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rollback_transactions",SKIP_OPEN_TABLE},
+  {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Denied_connections",SKIP_OPEN_TABLE},
+  {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Lost_connections",SKIP_OPEN_TABLE},
+  {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Access_denied",SKIP_OPEN_TABLE},
+  {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Empty_queries",SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
+};
+
+
+ST_FIELD_INFO table_stats_fields_info[]=
+{
+  {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema",SKIP_OPEN_TABLE},
+  {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name",SKIP_OPEN_TABLE},
+  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE},
+  {"ROWS_CHANGED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_changed",SKIP_OPEN_TABLE},
+  {"ROWS_CHANGED_X_INDEXES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_changed_x_#indexes",SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
+};
+
+ST_FIELD_INFO index_stats_fields_info[]=
+{
+  {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema",SKIP_OPEN_TABLE},
+  {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name",SKIP_OPEN_TABLE},
+  {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name",SKIP_OPEN_TABLE},
+  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0,0}
+};
 
 /*
   Find schema_tables elment by name
@@ -6332,7 +6843,7 @@ ST_SCHEMA_TABLE *find_schema_table(THD *thd, const char* table_name)
   }
 
   schema_table_a.table_name= table_name;
-  if (plugin_foreach(thd, find_schema_table_in_plugin, 
+  if (plugin_foreach(thd, find_schema_table_in_plugin,
                      MYSQL_INFORMATION_SCHEMA_PLUGIN, &schema_table_a))
     DBUG_RETURN(schema_table_a.schema_table);
 
@@ -6354,7 +6865,7 @@ ST_SCHEMA_TABLE *get_schema_table(enum enum_schema_tables schema_table_idx)
     into it two numbers, based on modulus of base-10 numbers.  In the ones
     position is the number of decimals.  Tens position is unused.  In the
     hundreds and thousands position is a two-digit decimal number representing
-    length.  Encode this value with  (decimals*100)+length  , where
+    length.  Encode this value with  (length*100)+decimals  , where
     0<decimals<10 and 0<=length<100 .
 
   @param
@@ -6396,18 +6907,27 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list)
       item->unsigned_flag= (fields_info->field_flags & MY_I_S_UNSIGNED);
       break;
     case MYSQL_TYPE_DATE:
+      if (!(item=new Item_return_date_time(fields_info->field_name,
+                                           MAX_DATE_WIDTH,
+                                           fields_info->field_type)))
+        DBUG_RETURN(0);
+      break;
     case MYSQL_TYPE_TIME:
+      if (!(item=new Item_return_date_time(fields_info->field_name,
+                                           MAX_TIME_FULL_WIDTH,
+                                           fields_info->field_type)))
+        DBUG_RETURN(0);
+      break;
     case MYSQL_TYPE_TIMESTAMP:
     case MYSQL_TYPE_DATETIME:
       if (!(item=new Item_return_date_time(fields_info->field_name,
+                                           MAX_DATETIME_WIDTH,
                                            fields_info->field_type)))
-      {
         DBUG_RETURN(0);
-      }
       break;
     case MYSQL_TYPE_FLOAT:
     case MYSQL_TYPE_DOUBLE:
-      if ((item= new Item_float(fields_info->field_name, 0.0, NOT_FIXED_DEC, 
+      if ((item= new Item_float(fields_info->field_name, 0.0, NOT_FIXED_DEC,
                            fields_info->field_length)) == NULL)
         DBUG_RETURN(NULL);
       break;
@@ -6417,6 +6937,12 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list)
       {
         DBUG_RETURN(0);
       }
+      /*
+        Create a type holder, as we want the type of the item to defined
+        the type of the object, not the value
+      */
+      if (!(item= new Item_type_holder(thd, item)))
+        DBUG_RETURN(0);
       item->unsigned_flag= (fields_info->field_flags & MY_I_S_UNSIGNED);
       item->decimals= fields_info->field_length%10;
       item->max_length= (fields_info->field_length/100)%100;
@@ -6583,7 +7109,7 @@ int make_table_names_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
 
 int make_columns_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
 {
-  int fields_arr[]= {3, 14, 13, 6, 15, 5, 16, 17, 18, -1};
+  int fields_arr[]= {3, 15, 14, 6, 16, 5, 17, 18, 19, -1};
   int *field_num= fields_arr;
   ST_FIELD_INFO *field_info;
   Name_resolution_context *context= &thd->lex->select_lex.context;
@@ -6591,9 +7117,9 @@ int make_columns_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
   for (; *field_num >= 0; field_num++)
   {
     field_info= &schema_table->fields_info[*field_num];
-    if (!thd->lex->verbose && (*field_num == 13 ||
-                               *field_num == 17 ||
-                               *field_num == 18))
+    if (!thd->lex->verbose && (*field_num == 14 ||
+                               *field_num == 18 ||
+                               *field_num == 19))
       continue;
     Item_field *field= new Item_field(context,
                                       NullS, NullS, field_info->field_name);
@@ -6637,7 +7163,7 @@ int make_character_sets_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
 
 int make_proc_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
 {
-  int fields_arr[]= {2, 3, 4, 26, 23, 22, 21, 25, 27, 28, 29, -1};
+  int fields_arr[]= {2, 3, 4, 27, 24, 23, 22, 26, 28, 29, 30, -1};
   int *field_num= fields_arr;
   ST_FIELD_INFO *field_info;
   Name_resolution_context *context= &thd->lex->select_lex.context;
@@ -6884,14 +7410,15 @@ static bool do_fill_table(THD *thd,
 bool get_schema_tables_result(JOIN *join,
                               enum enum_schema_table_state executed_place)
 {
-  JOIN_TAB *tmp_join_tab= join->join_tab+join->tables;
   THD *thd= join->thd;
   LEX *lex= thd->lex;
   bool result= 0;
   DBUG_ENTER("get_schema_tables_result");
 
-  for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++)
-  {  
+  for (JOIN_TAB *tab= first_linear_tab(join, WITH_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
     if (!tab->table || !tab->table->pos_in_table_list)
       break;
 
@@ -6988,6 +7515,91 @@ int hton_fill_schema_table(THD *thd, TABLE_LIST *tables, COND *cond)
 }
 
 
+static
+int store_key_cache_table_record(THD *thd, TABLE *table,
+                                 const char *name, uint name_length,
+                                 KEY_CACHE *key_cache,
+                                 uint partitions, uint partition_no)
+{
+  KEY_CACHE_STATISTICS keycache_stats;
+  uint err;
+  DBUG_ENTER("store_key_cache_table_record");
+
+  get_key_cache_statistics(key_cache, partition_no, &keycache_stats);
+
+  if (!key_cache->key_cache_inited || keycache_stats.mem_size == 0)
+    DBUG_RETURN(0);
+
+  restore_record(table, s->default_values);
+  table->field[0]->store(name, name_length, system_charset_info);
+  if (partitions == 0)
+    table->field[1]->set_null();
+  else
+  {
+    table->field[1]->set_notnull(); 
+    table->field[1]->store((long) partitions, TRUE);
+  }
+
+  if (partition_no == 0)
+    table->field[2]->set_null();
+  else
+  {
+    table->field[2]->set_notnull();
+    table->field[2]->store((long) partition_no, TRUE);
+  }
+  table->field[3]->store(keycache_stats.mem_size, TRUE);
+  table->field[4]->store(keycache_stats.block_size, TRUE);
+  table->field[5]->store(keycache_stats.blocks_used, TRUE);
+  table->field[6]->store(keycache_stats.blocks_unused, TRUE);
+  table->field[7]->store(keycache_stats.blocks_changed, TRUE);
+  table->field[8]->store(keycache_stats.read_requests, TRUE);
+  table->field[9]->store(keycache_stats.reads, TRUE);
+  table->field[10]->store(keycache_stats.write_requests, TRUE);
+  table->field[11]->store(keycache_stats.writes, TRUE);
+
+  err= schema_table_store_record(thd, table);
+  DBUG_RETURN(err);
+}
+
+int run_fill_key_cache_tables(const char *name, KEY_CACHE *key_cache, void *p)
+{
+  DBUG_ENTER("run_fill_key_cache_tables");
+
+  if (!key_cache->key_cache_inited)
+    DBUG_RETURN(0);
+
+  TABLE *table= (TABLE *)p;
+  THD *thd= table->in_use;
+  uint partitions= key_cache->partitions;    
+  size_t namelen= strlen(name);
+  DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS);
+
+  if (partitions)
+  {
+    for (uint i= 0; i < partitions; i++)
+    {
+      if (store_key_cache_table_record(thd, table, name, namelen,
+                                       key_cache, partitions, i+1))
+        DBUG_RETURN(1);
+    }
+  }
+
+  if (store_key_cache_table_record(thd, table, name, namelen,
+                                   key_cache, partitions, 0))
+    DBUG_RETURN(1);
+  DBUG_RETURN(0);
+}
+
+int fill_key_cache_tables(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+  DBUG_ENTER("fill_key_cache_tables");
+
+  int res= process_key_caches(run_fill_key_cache_tables, tables->table);
+
+  DBUG_RETURN(res);
+}
+
+
 ST_FIELD_INFO schema_fields_info[]=
 {
   {"CATALOG_NAME", FN_REFLEN, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
@@ -7015,17 +7627,17 @@ ST_FIELD_INFO tables_fields_info[]=
   {"ROW_FORMAT", 10, MYSQL_TYPE_STRING, 0, 1, "Row_format", OPEN_FULL_TABLE},
   {"TABLE_ROWS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Rows", OPEN_FULL_TABLE},
-  {"AVG_ROW_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
+  {"AVG_ROW_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Avg_row_length", OPEN_FULL_TABLE},
-  {"DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
+  {"DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_length", OPEN_FULL_TABLE},
   {"MAX_DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Max_data_length", OPEN_FULL_TABLE},
-  {"INDEX_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
+  {"INDEX_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Index_length", OPEN_FULL_TABLE},
   {"DATA_FREE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_free", OPEN_FULL_TABLE},
-  {"AUTO_INCREMENT", MY_INT64_NUM_DECIMAL_DIGITS , MYSQL_TYPE_LONGLONG, 0, 
+  {"AUTO_INCREMENT", MY_INT64_NUM_DECIMAL_DIGITS , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Auto_increment", OPEN_FULL_TABLE},
   {"CREATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Create_time", OPEN_FULL_TABLE},
   {"UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Update_time", OPEN_FULL_TABLE},
@@ -7063,6 +7675,8 @@ ST_FIELD_INFO columns_fields_info[]=
    0, (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, OPEN_FRM_ONLY},
   {"NUMERIC_SCALE", MY_INT64_NUM_DECIMAL_DIGITS , MYSQL_TYPE_LONGLONG,
    0, (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, OPEN_FRM_ONLY},
+  {"DATETIME_PRECISION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG,
+   0, (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, OPEN_FRM_ONLY},
   {"CHARACTER_SET_NAME", MY_CS_NAME_SIZE, MYSQL_TYPE_STRING, 0, 1, 0,
    OPEN_FRM_ONLY},
   {"COLLATION_NAME", MY_CS_NAME_SIZE, MYSQL_TYPE_STRING, 0, 1, "Collation",
@@ -7109,7 +7723,7 @@ ST_FIELD_INFO engines_fields_info[]=
 {
   {"ENGINE", 64, MYSQL_TYPE_STRING, 0, 0, "Engine", SKIP_OPEN_TABLE},
   {"SUPPORT", 8, MYSQL_TYPE_STRING, 0, 0, "Support", SKIP_OPEN_TABLE},
-  {"COMMENT", 80, MYSQL_TYPE_STRING, 0, 0, "Comment", SKIP_OPEN_TABLE},
+  {"COMMENT", 160, MYSQL_TYPE_STRING, 0, 0, "Comment", SKIP_OPEN_TABLE},
   {"TRANSACTIONS", 3, MYSQL_TYPE_STRING, 0, 1, "Transactions", SKIP_OPEN_TABLE},
   {"XA", 3, MYSQL_TYPE_STRING, 0, 1, "XA", SKIP_OPEN_TABLE},
   {"SAVEPOINTS", 3 ,MYSQL_TYPE_STRING, 0, 1, "Savepoints", SKIP_OPEN_TABLE},
@@ -7179,6 +7793,8 @@ ST_FIELD_INFO proc_fields_info[]=
   {"CHARACTER_OCTET_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 1, 0, SKIP_OPEN_TABLE},
   {"NUMERIC_PRECISION", 21 , MYSQL_TYPE_LONG, 0, 1, 0, SKIP_OPEN_TABLE},
   {"NUMERIC_SCALE", 21 , MYSQL_TYPE_LONG, 0, 1, 0, SKIP_OPEN_TABLE},
+  {"DATETIME_PRECISION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG,
+   0, (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, OPEN_FRM_ONLY},
   {"CHARACTER_SET_NAME", 64, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {"COLLATION_NAME", 64, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {"DTD_IDENTIFIER", 65535, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
@@ -7343,8 +7959,8 @@ ST_FIELD_INFO table_names_fields_info[]=
 {
   {"TABLE_CATALOG", FN_REFLEN, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
   {"TABLE_SCHEMA",NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
-  {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Tables_in_",
-   SKIP_OPEN_TABLE},
+  {"TABLE_NAME", NAME_CHAR_LEN + MYSQL50_TABLE_NAME_PREFIX_LENGTH,
+   MYSQL_TYPE_STRING, 0, 0, "Tables_in_", SKIP_OPEN_TABLE},
   {"TABLE_TYPE", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_type",
    OPEN_FRM_ONLY},
   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
@@ -7464,6 +8080,12 @@ ST_FIELD_INFO processlist_fields_info[]=
   {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State", SKIP_OPEN_TABLE},
   {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info",
    SKIP_OPEN_TABLE},
+  {"TIME_MS", 100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3, MYSQL_TYPE_DECIMAL,
+   0, 0, "Time_ms", SKIP_OPEN_TABLE},
+  {"STAGE", 2, MYSQL_TYPE_TINY,  0, 0, "Stage", SKIP_OPEN_TABLE},
+  {"MAX_STAGE", 2, MYSQL_TYPE_TINY,  0, 0, "Max_stage", SKIP_OPEN_TABLE},
+  {"PROGRESS", 703, MYSQL_TYPE_DECIMAL,  0, 0, "Progress",
+   SKIP_OPEN_TABLE},
   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
 };
 
@@ -7481,8 +8103,10 @@ ST_FIELD_INFO plugin_fields_info[]=
   {"PLUGIN_LIBRARY_VERSION", 20, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {"PLUGIN_AUTHOR", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {"PLUGIN_DESCRIPTION", 65535, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
-  {"PLUGIN_LICENSE", 80, MYSQL_TYPE_STRING, 0, 1, "License", SKIP_OPEN_TABLE},
+  {"PLUGIN_LICENSE", 80, MYSQL_TYPE_STRING, 0, 0, "License", SKIP_OPEN_TABLE},
   {"LOAD_OPTION", 64, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
+  {"PLUGIN_MATURITY", 12, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
+  {"PLUGIN_AUTH_VERSION", 80, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
 };
 
@@ -7508,9 +8132,9 @@ ST_FIELD_INFO files_fields_info[]=
   {"EXTENT_SIZE", 4, MYSQL_TYPE_LONGLONG, 0, 0, 0, SKIP_OPEN_TABLE},
   {"INITIAL_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
-  {"MAXIMUM_SIZE", 21, MYSQL_TYPE_LONGLONG, 0, 
+  {"MAXIMUM_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
-  {"AUTOEXTEND_SIZE", 21, MYSQL_TYPE_LONGLONG, 0, 
+  {"AUTOEXTEND_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
   {"CREATION_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, 0, SKIP_OPEN_TABLE},
   {"LAST_UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, 0, SKIP_OPEN_TABLE},
@@ -7522,20 +8146,20 @@ ST_FIELD_INFO files_fields_info[]=
   {"ROW_FORMAT", 10, MYSQL_TYPE_STRING, 0, 1, "Row_format", SKIP_OPEN_TABLE},
   {"TABLE_ROWS", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Rows", SKIP_OPEN_TABLE},
-  {"AVG_ROW_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"AVG_ROW_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Avg_row_length", SKIP_OPEN_TABLE},
-  {"DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_length", SKIP_OPEN_TABLE},
-  {"MAX_DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"MAX_DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Max_data_length", SKIP_OPEN_TABLE},
-  {"INDEX_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"INDEX_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Index_length", SKIP_OPEN_TABLE},
-  {"DATA_FREE", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"DATA_FREE", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_free", SKIP_OPEN_TABLE},
   {"CREATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Create_time", SKIP_OPEN_TABLE},
   {"UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Update_time", SKIP_OPEN_TABLE},
   {"CHECK_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Check_time", SKIP_OPEN_TABLE},
-  {"CHECKSUM", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"CHECKSUM", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Checksum", SKIP_OPEN_TABLE},
   {"STATUS", 20, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
   {"EXTRA", 255, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
@@ -7589,6 +8213,8 @@ ST_FIELD_INFO parameters_fields_info[]=
   {"CHARACTER_OCTET_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 1, 0, OPEN_FULL_TABLE},
   {"NUMERIC_PRECISION", 21 , MYSQL_TYPE_LONG, 0, 1, 0, OPEN_FULL_TABLE},
   {"NUMERIC_SCALE", 21 , MYSQL_TYPE_LONG, 0, 1, 0, OPEN_FULL_TABLE},
+  {"DATETIME_PRECISION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG,
+   0, (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, OPEN_FRM_ONLY},
   {"CHARACTER_SET_NAME", 64, MYSQL_TYPE_STRING, 0, 1, 0, OPEN_FULL_TABLE},
   {"COLLATION_NAME", 64, MYSQL_TYPE_STRING, 0, 1, 0, OPEN_FULL_TABLE},
   {"DTD_IDENTIFIER", 65535, MYSQL_TYPE_STRING, 0, 0, 0, OPEN_FULL_TABLE},
@@ -7620,6 +8246,35 @@ ST_FIELD_INFO tablespaces_fields_info[]=
 };
 
 
+ST_FIELD_INFO keycache_fields_info[]=
+{
+  {"KEY_CACHE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
+  {"SEGMENTS", 3, MYSQL_TYPE_LONG, 0, 
+   (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED) , 0, SKIP_OPEN_TABLE},
+  {"SEGMENT_NUMBER", 3, MYSQL_TYPE_LONG, 0,
+   (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
+  {"FULL_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
+  {"BLOCK_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE },
+  {"USED_BLOCKS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+    (MY_I_S_UNSIGNED), "Key_blocks_used", SKIP_OPEN_TABLE},
+  {"UNUSED_BLOCKS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_blocks_unused", SKIP_OPEN_TABLE},
+  {"DIRTY_BLOCKS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_blocks_not_flushed", SKIP_OPEN_TABLE},
+  {"READ_REQUESTS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_read_requests", SKIP_OPEN_TABLE},
+  {"READS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_reads", SKIP_OPEN_TABLE},
+  {"WRITE_REQUESTS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_write_requests", SKIP_OPEN_TABLE},
+  {"WRITES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_writes", SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
+};
+
+
 /*
   Description of ST_FIELD_INFO in table.h
 
@@ -7629,13 +8284,15 @@ ST_FIELD_INFO tablespaces_fields_info[]=
 
 ST_SCHEMA_TABLE schema_tables[]=
 {
-  {"CHARACTER_SETS", charsets_fields_info, create_schema_table, 
+  {"CHARACTER_SETS", charsets_fields_info, create_schema_table,
    fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0},
-  {"COLLATIONS", collation_fields_info, create_schema_table, 
+  {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table, 
+   fill_schema_client_stats, make_old_format, 0, -1, -1, 0, 0},
+  {"COLLATIONS", collation_fields_info, create_schema_table,
    fill_schema_collation, make_old_format, 0, -1, -1, 0, 0},
   {"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info,
    create_schema_table, fill_schema_coll_charset_app, 0, 0, -1, -1, 0, 0},
-  {"COLUMNS", columns_fields_info, create_schema_table, 
+  {"COLUMNS", columns_fields_info, create_schema_table,
    get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0,
    OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL},
   {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table,
@@ -7655,6 +8312,10 @@ ST_SCHEMA_TABLE schema_tables[]=
    fill_status, make_old_format, 0, 0, -1, 0, 0},
   {"GLOBAL_VARIABLES", variables_fields_info, create_schema_table,
    fill_variables, make_old_format, 0, 0, -1, 0, 0},
+  {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table,
+   fill_schema_index_stats, make_old_format, 0, -1, -1, 0, 0},
+  {"KEY_CACHES", keycache_fields_info, create_schema_table,
+   fill_key_cache_tables, make_old_format, 0, -1,-1, 0, 0}, 
   {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table,
    get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0,
    OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY},
@@ -7670,7 +8331,7 @@ ST_SCHEMA_TABLE schema_tables[]=
   {"PROCESSLIST", processlist_fields_info, create_schema_table,
    fill_schema_processlist, make_old_format, 0, -1, -1, 0, 0},
   {"PROFILING", query_profile_statistics_info, create_schema_table,
-    fill_query_profile_statistics_info, make_profile_table_for_show, 
+    fill_query_profile_statistics_info, make_profile_table_for_show,
     NULL, -1, -1, false, 0},
   {"REFERENTIAL_CONSTRAINTS", referential_constraints_fields_info,
    create_schema_table, get_all_tables, 0, get_referential_constraints_record,
@@ -7685,12 +8346,12 @@ ST_SCHEMA_TABLE schema_tables[]=
    fill_status, make_old_format, 0, 0, -1, 0, 0},
   {"SESSION_VARIABLES", variables_fields_info, create_schema_table,
    fill_variables, make_old_format, 0, 0, -1, 0, 0},
-  {"STATISTICS", stat_fields_info, create_schema_table, 
+  {"STATISTICS", stat_fields_info, create_schema_table,
    get_all_tables, make_old_format, get_schema_stat_record, 1, 2, 0,
    OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
-  {"STATUS", variables_fields_info, create_schema_table, fill_status, 
+  {"STATUS", variables_fields_info, create_schema_table, fill_status,
    make_old_format, 0, 0, -1, 1, 0},
-  {"TABLES", tables_fields_info, create_schema_table, 
+  {"TABLES", tables_fields_info, create_schema_table,
    get_all_tables, make_old_format, get_schema_tables_record, 1, 2, 0,
    OPTIMIZE_I_S_TABLE},
   {"TABLESPACES", tablespaces_fields_info, create_schema_table,
@@ -7702,14 +8363,18 @@ ST_SCHEMA_TABLE schema_tables[]=
    get_all_tables, make_table_names_old_format, 0, 1, 2, 1, 0},
   {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table,
    fill_schema_table_privileges, 0, 0, -1, -1, 0, 0},
+  {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table,
+   fill_schema_table_stats, make_old_format, 0, -1, -1, 0, 0},
   {"TRIGGERS", triggers_fields_info, create_schema_table,
    get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
    OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE},
   {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table, 
    fill_schema_user_privileges, 0, 0, -1, -1, 0, 0},
+  {"USER_STATISTICS", user_stats_fields_info, create_schema_table, 
+   fill_schema_user_stats, make_old_format, 0, -1, -1, 0, 0},
   {"VARIABLES", variables_fields_info, create_schema_table, fill_variables,
    make_old_format, 0, 0, -1, 1, 0},
-  {"VIEWS", view_fields_info, create_schema_table, 
+  {"VIEWS", view_fields_info, create_schema_table,
    get_all_tables, 0, get_schema_views_record, 1, 2, 0,
    OPEN_VIEW_ONLY|OPTIMIZE_I_S_TABLE},
   {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
@@ -7735,8 +8400,8 @@ int initialize_schema_table(st_plugin_int *plugin)
   {
     schema_table->create_table= create_schema_table;
     schema_table->old_format= make_old_format;
-    schema_table->idx_field1= -1, 
-    schema_table->idx_field2= -1; 
+    schema_table->idx_field1= -1,
+    schema_table->idx_field2= -1;
 
     /* Make the name available to the init() function. */
     schema_table->table_name= plugin->name.str;
@@ -7749,7 +8414,7 @@ int initialize_schema_table(st_plugin_int *plugin)
       my_free(schema_table);
       DBUG_RETURN(1);
     }
-    
+
     /* Make sure the plugin name is not set inside the init() function. */
     schema_table->table_name= plugin->name.str;
   }
diff --git a/sql/sql_signal.cc b/sql/sql_signal.cc
index 9910dfc924e..c9f1f42159f 100644
--- a/sql/sql_signal.cc
+++ b/sql/sql_signal.cc
@@ -22,10 +22,11 @@
 /*
   The parser accepts any error code (desired)
   The runtime internally supports any error code (desired)
-  The client server protocol is limited to 16 bits error codes (restriction)
-  Enforcing the 65535 limit in the runtime until the protocol can change.
+  The client server protocol is limited to 16 bits error codes (restriction),
+  and the value of 65535 is reserved for progress reporting.
+  Enforcing the 65534 limit in the runtime until the protocol can change.
 */
-#define MAX_MYSQL_ERRNO UINT_MAX16
+#define MAX_MYSQL_ERRNO 65534
 
 const LEX_STRING Diag_condition_item_names[]=
 {
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 60933069718..f1a3a2f9d8b 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -19,9 +19,9 @@
 #include "my_global.h"                          /* uchar */
 #include "my_base.h"                            /* ha_rows */
 #include "my_sys.h"                             /* qsort2_cmp */
+#include "queues.h"
 
 typedef struct st_buffpek BUFFPEK;
-typedef struct st_queue QUEUE;
 typedef struct st_sort_field SORT_FIELD;
 
 class Field;
@@ -49,7 +49,9 @@ struct TABLE;
    the callback function 'unpack_addon_fields'.
 */
 
-typedef struct st_sort_addon_field {  /* Sort addon packed field */
+typedef struct st_sort_addon_field
+{
+  /* Sort addon packed field */
   Field *field;          /* Original field */
   uint   offset;         /* Offset from the last sorted field */
   uint   null_offset;    /* Offset to to null bit from the last sorted field */
@@ -57,14 +59,6 @@ typedef struct st_sort_addon_field {  /* Sort addon packed field */
   uint8  null_bit;       /* Null bit mask for the field */
 } SORT_ADDON_FIELD;
 
-typedef struct st_buffpek {		/* Struktur om sorteringsbuffrarna */
-  my_off_t file_pos;			/* Where we are in the sort file */
-  uchar *base,*key;			/* key pointers */
-  ha_rows count;			/* Number of rows in table */
-  ulong mem_count;			/* numbers of keys in memory */
-  ulong max_keys;			/* Max keys in buffert */
-} BUFFPEK;
-
 struct BUFFPEK_COMPARE_CONTEXT
 {
   qsort_cmp2 key_compare;
@@ -78,6 +72,7 @@ typedef struct st_sort_param {
   uint addon_length;        /* Length of added packed fields */
   uint res_length;          /* Length of records in final sorted file/buffer */
   uint keys;				/* Max keys / buffer */
+  uint min_dupl_count;
   ha_rows max_rows,examined_rows;
   TABLE *sort_form;			/* For quicker make_sortkey */
   SORT_FIELD *local_sortorder;
@@ -101,6 +96,10 @@ int merge_buffers(SORTPARAM *param,IO_CACHE *from_file,
 		  IO_CACHE *to_file, uchar *sort_buffer,
 		  BUFFPEK *lastbuff,BUFFPEK *Fb,
 		  BUFFPEK *Tb,int flag);
+int merge_index(SORTPARAM *param, uchar *sort_buffer,
+		BUFFPEK *buffpek, uint maxbuffer,
+		IO_CACHE *tempfile, IO_CACHE *outfile);
+
 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length);
 
 #endif /* SQL_SORT_INCLUDED */
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index fcd134aff04..6c772617e7b 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -27,6 +27,9 @@
 
 #include "sql_string.h"
 
+#ifdef MYSQL_CLIENT
+#error Attempt to use server-side sql_string on client. Use client/sql_string.cc
+#endif 
 /*****************************************************************************
 ** String functions
 *****************************************************************************/
@@ -80,13 +83,13 @@ bool String::real_alloc(uint32 length)
 */
 bool String::realloc(uint32 alloc_length)
 {
-  uint32 len=ALIGN_SIZE(alloc_length+1);
-  DBUG_ASSERT(len > alloc_length);
-  if (len <= alloc_length)
-    return TRUE;                                 /* Overflow */
-  if (Alloced_length < len)
+  if (Alloced_length <= alloc_length)
   {
     char *new_ptr;
+    uint32 len= ALIGN_SIZE(alloc_length+1);
+    DBUG_ASSERT(len > alloc_length);
+    if (len <= alloc_length)
+      return TRUE;                                 /* Overflow */
     if (alloced)
     {
       if (!(new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME))))
@@ -383,7 +386,7 @@ bool String::append(const String &s)
 {
   if (s.length())
   {
-    if (realloc(str_length+s.length()))
+    if (realloc_with_extra_if_needed(str_length+s.length()))
       return TRUE;
     memcpy(Ptr+str_length,s.ptr(),s.length());
     str_length+=s.length();
@@ -408,7 +411,7 @@ bool String::append(const char *s,uint32 arg_length)
   {
     uint32 add_length=arg_length * str_charset->mbmaxlen;
     uint dummy_errors;
-    if (realloc(str_length+ add_length))
+    if (realloc_with_extra_if_needed(str_length+ add_length))
       return TRUE;
     str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
 				  s, arg_length, &my_charset_latin1,
@@ -419,7 +422,7 @@ bool String::append(const char *s,uint32 arg_length)
   /*
     For an ASCII compatinble string we can just append.
   */
-  if (realloc(str_length+arg_length))
+  if (realloc_with_extra_if_needed(str_length+arg_length))
     return TRUE;
   memcpy(Ptr+str_length,s,arg_length);
   str_length+=arg_length;
@@ -474,14 +477,14 @@ bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
 
     add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
     uint dummy_errors;
-    if (realloc(str_length + add_length)) 
+    if (realloc_with_extra_if_needed(str_length + add_length)) 
       return TRUE;
     str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
 				  s, arg_length, cs, &dummy_errors);
   }
   else
   {
-    if (realloc(str_length + arg_length)) 
+    if (realloc_with_extra_if_needed(str_length + arg_length)) 
       return TRUE;
     memcpy(Ptr + str_length, s, arg_length);
     str_length+= arg_length;
@@ -491,7 +494,7 @@ bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
 
 bool String::append(IO_CACHE* file, uint32 arg_length)
 {
-  if (realloc(str_length+arg_length))
+  if (realloc_with_extra_if_needed(str_length+arg_length))
     return TRUE;
   if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
   {
@@ -507,7 +510,7 @@ bool String::append_with_prefill(const char *s,uint32 arg_length,
 {
   int t_length= arg_length > full_length ? arg_length : full_length;
 
-  if (realloc(str_length + t_length))
+  if (realloc_with_extra_if_needed(str_length + t_length))
     return TRUE;
   t_length= full_length - arg_length;
   if (t_length > 0)
@@ -524,11 +527,11 @@ uint32 String::numchars()
   return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
 }
 
-int String::charpos(int i,uint32 offset)
+int String::charpos(longlong i,uint32 offset)
 {
   if (i <= 0)
-    return i;
-  return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
+    return (int)i;
+  return (int)str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,(size_t)i);
 }
 
 int String::strstr(const String &s,uint32 offset)
@@ -616,7 +619,7 @@ bool String::replace(uint32 offset,uint32 arg_length,
     {
       if (diff)
       {
-	if (realloc(str_length+(uint32) diff))
+	if (realloc_with_extra_if_needed(str_length+(uint32) diff))
 	  return TRUE;
 	bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
 		  str_length-offset-arg_length);
@@ -668,10 +671,10 @@ void String::qs_append(int i)
   str_length+= (int) (end-buff);
 }
 
-void String::qs_append(uint i)
+void String::qs_append(ulonglong i)
 {
   char *buff= Ptr + str_length;
-  char *end= int10_to_str(i, buff, 10);
+  char *end= longlong10_to_str(i, buff,10);
   str_length+= (int) (end-buff);
 }
 
@@ -946,6 +949,7 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs,
   return dst - dst0;
 }
 
+
 /*
   copy a string,
   with optional character set conversion,
diff --git a/sql/sql_string.h b/sql/sql_string.h
index dc976b3b290..fcf04fb7690 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -26,8 +26,11 @@
 #include "my_sys.h"              /* alloc_root, my_free, my_realloc */
 #include "m_string.h"                           /* TRASH */
 
+#ifdef MYSQL_CLIENT
+#error Attempt to use server-side sql_string on client. Use client/sql_string.h
+#endif 
+
 class String;
-typedef struct charset_info_st CHARSET_INFO;
 typedef struct st_io_cache IO_CACHE;
 typedef struct st_mem_root MEM_ROOT;
 
@@ -54,39 +57,45 @@ uint convert_to_printable(char *to, size_t to_len,
 class String
 {
   char *Ptr;
-  uint32 str_length,Alloced_length;
+  uint32 str_length,Alloced_length, extra_alloc;
   bool alloced;
   CHARSET_INFO *str_charset;
 public:
   String()
   { 
-    Ptr=0; str_length=Alloced_length=0; alloced=0; 
+    Ptr=0; str_length=Alloced_length=extra_alloc=0; alloced=0; 
     str_charset= &my_charset_bin; 
   }
   String(uint32 length_arg)
   { 
-    alloced=0; Alloced_length=0; (void) real_alloc(length_arg); 
+    alloced=0; Alloced_length= extra_alloc= 0; (void) real_alloc(length_arg); 
     str_charset= &my_charset_bin;
   }
   String(const char *str, CHARSET_INFO *cs)
   { 
-    Ptr=(char*) str; str_length=(uint) strlen(str); Alloced_length=0; alloced=0;
+    Ptr=(char*) str; str_length= (uint32) strlen(str);
+    Alloced_length= extra_alloc= 0; alloced=0;
     str_charset=cs;
   }
+  /*
+    NOTE: If one intend to use the c_ptr() method, the following two
+    contructors need the size of memory for STR to be at least LEN+1 (to make
+    room for zero termination).
+  */
   String(const char *str,uint32 len, CHARSET_INFO *cs)
   { 
-    Ptr=(char*) str; str_length=len; Alloced_length=0; alloced=0;
+    Ptr=(char*) str; str_length=len; Alloced_length= extra_alloc=0; alloced=0;
     str_charset=cs;
   }
   String(char *str,uint32 len, CHARSET_INFO *cs)
   { 
-    Ptr=(char*) str; Alloced_length=str_length=len; alloced=0;
+    Ptr=(char*) str; Alloced_length=str_length=len; extra_alloc= 0; alloced=0;
     str_charset=cs;
   }
   String(const String &str)
   { 
     Ptr=str.Ptr ; str_length=str.str_length ;
-    Alloced_length=str.Alloced_length; alloced=0; 
+    Alloced_length=str.Alloced_length; extra_alloc= 0; alloced=0; 
     str_charset=str.str_charset;
   }
   static void *operator new(size_t size, MEM_ROOT *mem_root) throw ()
@@ -106,8 +115,10 @@ public:
   inline CHARSET_INFO *charset() const { return str_charset; }
   inline uint32 length() const { return str_length;}
   inline uint32 alloced_length() const { return Alloced_length;}
+  inline uint32 extra_allocation() const { return extra_alloc;}
   inline char& operator [] (uint32 i) const { return Ptr[i]; }
   inline void length(uint32 len) { str_length=len ; }
+  inline void extra_allocation(uint32 len) { extra_alloc= len; }
   inline bool is_empty() const { return (str_length == 0); }
   inline void mark_as_const() { Alloced_length= 0;}
   inline const char *ptr() const { return Ptr; }
@@ -144,11 +155,9 @@ public:
   {
     DBUG_ASSERT(&str != this);
     free();
-    Ptr=(char*) str.ptr()+offset; str_length=arg_length; alloced=0;
+    Ptr=(char*) str.ptr()+offset; str_length=arg_length;
     if (str.Alloced_length)
       Alloced_length=str.Alloced_length-offset;
-    else
-      Alloced_length=0;
     str_charset=str.str_charset;
   }
 
@@ -164,13 +173,13 @@ public:
   inline void set(char *str,uint32 arg_length, CHARSET_INFO *cs)
   {
     free();
-    Ptr=(char*) str; str_length=Alloced_length=arg_length ; alloced=0;
+    Ptr=(char*) str; str_length=Alloced_length=arg_length;
     str_charset=cs;
   }
   inline void set(const char *str,uint32 arg_length, CHARSET_INFO *cs)
   {
     free();
-    Ptr=(char*) str; str_length=arg_length; Alloced_length=0 ; alloced=0;
+    Ptr=(char*) str; str_length=arg_length;
     str_charset=cs;
   }
   bool set_ascii(const char *str, uint32 arg_length);
@@ -189,6 +198,18 @@ public:
   { return set_int((longlong)num, true, cs); }
   bool set_real(double num,uint decimals, CHARSET_INFO *cs);
 
+  /* Move handling of buffer from some other object to String */
+  void reassociate(char *ptr, uint32 length, uint32 alloced_length,
+                   CHARSET_INFO *cs)
+  { 
+    free();
+    Ptr= ptr;
+    str_length= length;
+    Alloced_length= alloced_length;
+    str_charset= cs;
+    alloced= ptr != 0;
+  }
+
   /*
     PMG 2004.11.12
     This is a method that works the same as perl's "chop". It simply
@@ -221,11 +242,11 @@ public:
     if (alloced)
     {
       alloced=0;
-      Alloced_length=0;
       my_free(Ptr);
-      Ptr=0;
-      str_length=0;				/* Safety */
     }
+    Alloced_length= extra_alloc= 0;
+    Ptr=0;
+    str_length=0;				/* Safety */
   }
   inline bool alloc(uint32 arg_length)
   {
@@ -235,9 +256,21 @@ public:
   }
   bool real_alloc(uint32 arg_length);			// Empties old string
   bool realloc(uint32 arg_length);
-  inline void shrink(uint32 arg_length)		// Shrink buffer
+  bool realloc_with_extra(uint32 arg_length)
+  {
+    if (extra_alloc < 4096)
+      extra_alloc= extra_alloc*2+128;
+    return realloc(arg_length + extra_alloc);
+  }
+  bool realloc_with_extra_if_needed(uint32 arg_length)
   {
     if (arg_length < Alloced_length)
+      return 0;
+    return realloc_with_extra(arg_length);
+  }
+  inline void shrink(uint32 arg_length)		// Shrink buffer
+  {
+    if (ALIGN_SIZE(arg_length+1) < Alloced_length)
     {
       char *new_ptr;
       if (!(new_ptr=(char*) my_realloc(Ptr,arg_length,MYF(0))))
@@ -264,7 +297,6 @@ public:
       DBUG_ASSERT(!s.uses_buffer_owned_by(this));
       free();
       Ptr=s.Ptr ; str_length=s.str_length ; Alloced_length=s.Alloced_length;
-      alloced=0;
     }
     return *this;
   }
@@ -280,6 +312,14 @@ public:
   bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
   bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom,
 	    CHARSET_INFO *csto, uint *errors);
+  void move(String &s)
+  {
+    free();
+    Ptr=s.Ptr ; str_length=s.str_length ; Alloced_length=s.Alloced_length;
+    extra_alloc= s.extra_alloc;
+    alloced= s.alloced;
+    s.alloced= 0;
+  }
   bool append(const String &s);
   bool append(const char *s);
   bool append(LEX_STRING *ls)
@@ -304,7 +344,7 @@ public:
     }
     else
     {
-      if (realloc(str_length+1))
+      if (realloc_with_extra(str_length + 1))
 	return 1;
       Ptr[str_length++]=chr;
     }
@@ -315,8 +355,9 @@ public:
   friend int sortcmp(const String *a,const String *b, CHARSET_INFO *cs);
   friend int stringcmp(const String *a,const String *b);
   friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
+  friend class Field;
   uint32 numchars();
-  int charpos(int i,uint32 offset=0);
+  int charpos(longlong i,uint32 offset=0);
 
   int reserve(uint32 space_needed)
   {
@@ -359,6 +400,10 @@ public:
     int4store(Ptr + position,value);
   }
 
+  void qs_append(const char *str)
+  {
+    qs_append(str, (uint32)strlen(str));
+  }
   void qs_append(const char *str, uint32 len);
   void qs_append(double d);
   void qs_append(double *d);
@@ -368,7 +413,15 @@ public:
      str_length++;
   }
   void qs_append(int i);
-  void qs_append(uint i);
+  void qs_append(uint i)
+  {
+    qs_append((ulonglong)i);
+  }
+  void qs_append(ulong i)
+  {
+    qs_append((ulonglong)i);
+  }
+  void qs_append(ulonglong i);
 
   /* Inline (general) functions used by the protocol functions */
 
@@ -418,8 +471,9 @@ public:
   }
 };
 
-static inline bool check_if_only_end_space(CHARSET_INFO *cs, char *str, 
-                                           char *end)
+static inline bool check_if_only_end_space(CHARSET_INFO *cs,
+                                           const char *str, 
+                                           const char *end)
 {
   return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
 }
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 9a68420e5fc..1b0279c9969 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -1,5 +1,6 @@
 /*
-   Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -45,6 +46,7 @@
 #include <hash.h>
 #include <myisam.h>
 #include <my_dir.h>
+#include "create_options.h"
 #include "sp_head.h"
 #include "sp.h"
 #include "sql_trigger.h"
@@ -61,23 +63,16 @@ const char *primary_key_name="PRIMARY";
 
 static bool check_if_keyname_exists(const char *name,KEY *start, KEY *end);
 static char *make_unique_key_name(const char *field_name,KEY *start,KEY *end);
-static int copy_data_between_tables(TABLE *from,TABLE *to,
-                                    List<Create_field> &create, bool ignore,
-				    uint order_num, ORDER *order,
-				    ha_rows *copied,ha_rows *deleted,
-                                    enum enum_enable_or_disable keys_onoff,
-                                    bool error_if_not_empty);
+static int copy_data_between_tables(THD *thd, TABLE *,TABLE *,
+                                    List<Create_field> &, bool,
+				    uint, ORDER *, ha_rows *,ha_rows *,
+                                    enum enum_enable_or_disable, bool);
 
 static bool prepare_blob_field(THD *thd, Create_field *sql_field);
 static bool check_engine(THD *, const char *, HA_CREATE_INFO *);
-static int
-mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
-                           Alter_info *alter_info,
-                           bool tmp_table,
-                           uint *db_options,
-                           handler *file, KEY **key_info_buffer,
-                           uint *key_count, int select_field_count);
-
+static int mysql_prepare_create_table(THD *, HA_CREATE_INFO *, Alter_info *,
+                                      bool, uint *, handler *, KEY **, uint *,
+                                      int);
 
 /**
   @brief Helper function for explain_filename
@@ -2278,7 +2273,7 @@ err:
   {
     if (!foreign_key_error)
       my_printf_error(ER_BAD_TABLE_ERROR, ER(ER_BAD_TABLE_ERROR), MYF(0),
-                      wrong_tables.c_ptr());
+                      wrong_tables.c_ptr_safe());
     else
       my_message(ER_ROW_IS_REFERENCED, ER(ER_ROW_IS_REFERENCED), MYF(0));
     error= 1;
@@ -2708,7 +2703,12 @@ int prepare_create_field(Create_field *sql_field,
                           (sql_field->decimals << FIELDFLAG_DEC_SHIFT));
     break;
   }
-  if (!(sql_field->flags & NOT_NULL_FLAG))
+  if (sql_field->flags & NOT_NULL_FLAG)
+    DBUG_PRINT("info", ("1"));
+  if (sql_field->vcol_info)
+    DBUG_PRINT("info", ("2"));
+  if (!(sql_field->flags & NOT_NULL_FLAG) ||
+      (sql_field->vcol_info))  /* Make virtual columns allow NULL values */
     sql_field->pack_flag|= FIELDFLAG_MAYBE_NULL;
   if (sql_field->flags & NO_DEFAULT_VALUE_FLAG)
     sql_field->pack_flag|= FIELDFLAG_NO_DEFAULT;
@@ -3061,6 +3061,8 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
             null_fields--;
 	  sql_field->flags=		dup_field->flags;
           sql_field->interval=          dup_field->interval;
+          sql_field->vcol_info=         dup_field->vcol_info;
+          sql_field->stored_in_db=      dup_field->stored_in_db;
 	  it2.remove();			// Remove first (create) definition
 	  select_field_pos--;
 	  break;
@@ -3070,7 +3072,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     /* Don't pack rows in old tables if the user has requested this */
     if ((sql_field->flags & BLOB_FLAG) ||
 	(sql_field->sql_type == MYSQL_TYPE_VARCHAR &&
-	create_info->row_type != ROW_TYPE_FIXED))
+         create_info->row_type != ROW_TYPE_FIXED))
       (*db_options)|= HA_OPTION_PACK_RECORD;
     it2.rewind();
   }
@@ -3093,7 +3095,28 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     sql_field->offset= record_offset;
     if (MTYP_TYPENR(sql_field->unireg_check) == Field::NEXT_NUMBER)
       auto_increment++;
-    record_offset+= sql_field->pack_length;
+    if (parse_option_list(thd, &sql_field->option_struct,
+                          sql_field->option_list,
+                          create_info->db_type->field_options, FALSE,
+                          thd->mem_root))
+      DBUG_RETURN(TRUE);
+    /*
+      For now skip fields that are not physically stored in the database
+      (virtual fields) and update their offset later 
+      (see the next loop).
+    */
+    if (sql_field->stored_in_db)
+      record_offset+= sql_field->pack_length;
+  }
+  /* Update virtual fields' offset*/
+  it.rewind();
+  while ((sql_field=it++))
+  {
+    if (!sql_field->stored_in_db)
+    {
+      sql_field->offset= record_offset;
+      record_offset+= sql_field->pack_length;
+    }
   }
   if (timestamps_with_niladic > 1)
   {
@@ -3142,6 +3165,8 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     if (key->type == Key::FOREIGN_KEY)
     {
       fk_key_count++;
+      if (((Foreign_key *)key)->validate(alter_info->create_list))
+        DBUG_RETURN(TRUE);
       Foreign_key *fk_key= (Foreign_key*) key;
       if (fk_key->ref_columns.elements &&
 	  fk_key->ref_columns.elements != fk_key->columns.elements)
@@ -3271,6 +3296,12 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     key_info->key_part=key_part_info;
     key_info->usable_key_parts= key_number;
     key_info->algorithm= key->key_create_info.algorithm;
+    key_info->option_list= key->option_list;
+    if (parse_option_list(thd, &key_info->option_struct,
+                          key_info->option_list,
+                          create_info->db_type->index_options, FALSE,
+                          thd->mem_root))
+      DBUG_RETURN(TRUE);
 
     if (key->type == Key::FULLTEXT)
     {
@@ -3435,6 +3466,17 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
 	  }
 	}
 #endif
+        if (!sql_field->stored_in_db)
+        {
+          /* Key fields must always be physically stored. */
+          my_error(ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN, MYF(0));
+          DBUG_RETURN(TRUE);
+        }
+        if (key->type == Key::PRIMARY && sql_field->vcol_info)
+        {
+          my_error(ER_PRIMARY_KEY_BASED_ON_VIRTUAL_COLUMN, MYF(0));
+          DBUG_RETURN(TRUE);
+        }
 	if (!(sql_field->flags & NOT_NULL_FLAG))
 	{
 	  if (key->type == Key::PRIMARY)
@@ -3519,7 +3561,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
 	else if (!(file->ha_table_flags() & HA_NO_PREFIX_CHAR_KEYS))
 	  length=column->length;
       }
-      else if (length == 0)
+      else if (length == 0 && (sql_field->flags & NOT_NULL_FLAG))
       {
 	my_error(ER_WRONG_KEY_COLUMN, MYF(0), column->field_name.str);
 	  DBUG_RETURN(TRUE);
@@ -3687,6 +3729,12 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     }
   }
 
+  if (parse_option_list(thd, &create_info->option_struct,
+                          create_info->option_list,
+                          file->partition_ht()->table_options, FALSE,
+                          thd->mem_root))
+      DBUG_RETURN(TRUE);
+
   DBUG_RETURN(FALSE);
 }
 
@@ -3830,6 +3878,7 @@ void sp_prepare_create_field(THD *thd, Create_field *sql_field)
 }
 
 
+#ifdef WITH_PARTITION_STORAGE_ENGINE
 /**
   Auxiliary function which allows to check if freshly created .FRM
   file for table can be opened.
@@ -3867,6 +3916,69 @@ static bool check_if_created_table_can_be_opened(THD *thd,
   (void) file->ha_create_handler_files(path, NULL, CHF_DELETE_FLAG, create_info);
   return result;
 }
+#endif
+
+
+/**
+  Check that there is no frm file for given table
+
+  @param old_path        path to the old frm file
+  @param path            path to the frm file in new encoding
+  @param db              database name
+  @param table_name      table name
+  @param alias           table name for error message (for new encoding)
+  @param issue_error     should we issue error messages
+
+  @retval FALSE there is no frm file
+  @retval TRUE  there is frm file
+*/
+
+bool check_table_file_presence(char *old_path,
+                               char *path,
+                               const char *db,
+                               const char *table_name,
+                               const char *alias,
+                               bool issue_error)
+{
+  if (!access(path,F_OK))
+  {
+    if (issue_error)
+      my_error(ER_TABLE_EXISTS_ERROR,MYF(0),alias);
+    return TRUE;
+  }
+  {
+    /*
+      Check if file of the table in 5.0 file name encoding exists.
+
+      Except case when it is the same table.
+    */
+    char tbl50[FN_REFLEN];
+#ifdef _WIN32
+    if (check_if_legal_tablename(table_name) != 0)
+    {
+      /*
+       Check for reserved device names for which access() returns 0
+       (CON, AUX etc).
+      */
+      return FALSE;
+    }
+#endif
+    strxmov(tbl50, mysql_data_home, "/", db, "/", table_name, NullS);
+    fn_format(tbl50, tbl50, "", reg_ext, MY_UNPACK_FILENAME);
+    if (!access(tbl50, F_OK) &&
+        (old_path == NULL ||
+         strcmp(old_path, tbl50) != 0))
+    {
+      if (issue_error)
+      {
+        strxmov(tbl50, MYSQL50_TABLE_NAME_PREFIX, table_name, NullS);
+        my_error(ER_TABLE_EXISTS_ERROR, MYF(0), tbl50);
+      }
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
 
 
 /*
@@ -3937,8 +4049,9 @@ bool mysql_create_table_no_lock(THD *thd,
   set_table_default_charset(thd, create_info, (char*) db);
 
   db_options= create_info->table_options;
-  if (create_info->row_type == ROW_TYPE_DYNAMIC)
-    db_options|=HA_OPTION_PACK_RECORD;
+  if (create_info->row_type != ROW_TYPE_FIXED &&
+      create_info->row_type != ROW_TYPE_DEFAULT)
+    db_options|= HA_OPTION_PACK_RECORD;
   alias= table_case_name(create_info, table_name);
   if (!(file= get_new_handler((TABLE_SHARE*) 0, thd->mem_root,
                               create_info->db_type)))
@@ -4132,24 +4245,31 @@ bool mysql_create_table_no_lock(THD *thd,
       find_temporary_table(thd, db, table_name))
   {
     if (create_info->options & HA_LEX_CREATE_IF_NOT_EXISTS)
-    {
-      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
-                          ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR),
-                          alias);
-      error= 0;
-      goto err;
-    }
+      goto warn;
     my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alias);
     goto err;
   }
 
+  /* Give warnings for not supported table options */
+#if defined(WITH_ARIA_STORAGE_ENGINE)
+  extern handlerton *maria_hton;
+  if (file->ht != maria_hton)
+#endif
+    if (create_info->transactional)
+      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_ILLEGAL_HA_CREATE_OPTION,
+                          ER(ER_ILLEGAL_HA_CREATE_OPTION),
+                          file->engine_name()->str,
+                          "TRANSACTIONAL=1");
+
   if (!internal_tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE))
   {
-    if (!access(path,F_OK))
+    if (check_table_file_presence(NULL, path, db, table_name, table_name,
+                                  !(create_info->options &
+                                    HA_LEX_CREATE_IF_NOT_EXISTS)))
     {
       if (create_info->options & HA_LEX_CREATE_IF_NOT_EXISTS)
         goto warn;
-      my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name);
       goto err;
     }
     /*
@@ -4197,7 +4317,6 @@ bool mysql_create_table_no_lock(THD *thd,
           goto warn;
         my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name);
         goto err;
-        break;
       default:
         DBUG_PRINT("info", ("error: %u from storage engine", retcode));
         my_error(retcode, MYF(0),table_name);
@@ -4459,7 +4578,7 @@ mysql_rename_table(handlerton *base, const char *old_db,
   char from[FN_REFLEN + 1], to[FN_REFLEN + 1],
     lc_from[FN_REFLEN + 1], lc_to[FN_REFLEN + 1];
   char *from_base= from, *to_base= to;
-  char tmp_name[NAME_LEN+1];
+  char tmp_name[SAFE_NAME_LEN+1];
   handler *file;
   int error=0;
   DBUG_ENTER("mysql_rename_table");
@@ -4827,8 +4946,8 @@ is_index_maintenance_unique (TABLE *table, Alter_info *alter_info)
     that need to be dropped and/or (re-)created.
 
   RETURN VALUES
-    TRUE   error
-    FALSE  success
+    TRUE   The tables are not compatible; We have to do a full alter table
+    FALSE  The tables are compatible; We only have to modify the .frm
 */
 
 bool
@@ -4850,6 +4969,7 @@ mysql_compare_tables(TABLE *table,
   KEY_PART_INFO *key_part;
   KEY_PART_INFO *end;
   THD *thd= table->in_use;
+  uint i;
   /*
     Remember if the new definition has new VARCHAR column;
     create_info->varchar will be reset in mysql_prepare_create_table.
@@ -4876,6 +4996,9 @@ mysql_compare_tables(TABLE *table,
   Alter_info tmp_alter_info(*alter_info, thd->mem_root);
   uint db_options= 0; /* not used */
 
+  /* Set default value for return value (to ensure it's always set) */
+  *need_copy_table= ALTER_TABLE_DATA_CHANGED;
+
   /* Create the prepared information. */
   if (mysql_prepare_create_table(thd, create_info,
                                  &tmp_alter_info,
@@ -4924,6 +5047,8 @@ mysql_compare_tables(TABLE *table,
       create_info->used_fields & HA_CREATE_USED_CHARSET ||
       create_info->used_fields & HA_CREATE_USED_DEFAULT_CHARSET ||
       (table->s->row_type != create_info->row_type) ||
+      create_info->used_fields & HA_CREATE_USED_PAGE_CHECKSUM ||
+      create_info->used_fields & HA_CREATE_USED_TRANSACTIONAL ||
       create_info->used_fields & HA_CREATE_USED_PACK_KEYS ||
       create_info->used_fields & HA_CREATE_USED_MAX_ROWS ||
       (alter_info->flags & (ALTER_RECREATE | ALTER_FOREIGN_KEY)) ||
@@ -4931,10 +5056,16 @@ mysql_compare_tables(TABLE *table,
       !table->s->mysql_version ||
       (table->s->frm_version < FRM_VER_TRUE_VARCHAR && varchar))
   {
-    *need_copy_table= ALTER_TABLE_DATA_CHANGED;
+    DBUG_PRINT("info", ("Basic checks -> ALTER_TABLE_DATA_CHANGED"));
     DBUG_RETURN(0);
   }
 
+  if ((create_info->fields_option_struct= (ha_field_option_struct**)
+         thd->calloc(sizeof(void*) * table->s->fields)) == NULL ||
+      (create_info->indexes_option_struct= (ha_index_option_struct**)
+         thd->calloc(sizeof(void*) * table->s->keys)) == NULL)
+    DBUG_RETURN(1);
+
   /*
     Use transformed info to evaluate possibility of in-place ALTER TABLE
     but use the preserved field to persist modifications.
@@ -4946,12 +5077,18 @@ mysql_compare_tables(TABLE *table,
     Go through fields and check if the original ones are compatible
     with new table.
   */
-  for (f_ptr= table->field, new_field= new_field_it++,
+  for (i= 0, f_ptr= table->field, new_field= new_field_it++,
        tmp_new_field= tmp_new_field_it++;
        (field= *f_ptr);
-       f_ptr++, new_field= new_field_it++,
+       i++, f_ptr++, new_field= new_field_it++,
        tmp_new_field= tmp_new_field_it++)
   {
+    DBUG_ASSERT(i < table->s->fields);
+    create_info->fields_option_struct[i]= tmp_new_field->option_struct;
+
+    /* reset common markers of how field changed */
+    field->flags&= ~(FIELD_IS_RENAMED | FIELD_IN_ADD_INDEX);
+
     /* Make sure we have at least the default charset in use. */
     if (!new_field->charset)
       new_field->charset= create_info->default_table_charset;
@@ -4960,19 +5097,32 @@ mysql_compare_tables(TABLE *table,
     if ((tmp_new_field->flags & NOT_NULL_FLAG) !=
 	(uint) (field->flags & NOT_NULL_FLAG))
     {
+      DBUG_PRINT("info", ("NULL behaviour difference in field '%s' -> "
+                          "ALTER_TABLE_DATA_CHANGED", new_field->field_name));
+      DBUG_RETURN(0);
+    }
+
+    /*
+      Check if the altered column is computed and either
+      is stored or is used in the partitioning expression.
+      TODO: Mark such a column with an alter flag only if
+      the defining expression has changed.
+    */
+    if (field->vcol_info && 
+        (field->stored_in_db || field->vcol_info->is_in_partitioning_expr()))
+    {
       *need_copy_table= ALTER_TABLE_DATA_CHANGED;
       DBUG_RETURN(0);
     }
 
     /* Don't pack rows in old tables if the user has requested this. */
-    if (create_info->row_type == ROW_TYPE_DYNAMIC ||
-	(tmp_new_field->flags & BLOB_FLAG) ||
-	(tmp_new_field->sql_type == MYSQL_TYPE_VARCHAR &&
-	create_info->row_type != ROW_TYPE_FIXED))
-      create_info->table_options|= HA_OPTION_PACK_RECORD;
+      if (create_info->row_type == ROW_TYPE_DYNAMIC ||
+          (tmp_new_field->flags & BLOB_FLAG) ||
+          (tmp_new_field->sql_type == MYSQL_TYPE_VARCHAR &&
+           create_info->row_type != ROW_TYPE_FIXED))
+        create_info->table_options|= HA_OPTION_PACK_RECORD;
 
     /* Check if field was renamed */
-    field->flags&= ~FIELD_IS_RENAMED;
     if (my_strcasecmp(system_charset_info,
 		      field->field_name,
 		      tmp_new_field->field_name))
@@ -4981,11 +5131,10 @@ mysql_compare_tables(TABLE *table,
     /* Evaluate changes bitmap and send to check_if_incompatible_data() */
     if (!(tmp= field->is_equal(tmp_new_field)))
     {
-      *need_copy_table= ALTER_TABLE_DATA_CHANGED;
+      DBUG_PRINT("info", ("!field_is_equal('%s') -> ALTER_TABLE_DATA_CHANGED",
+                          new_field->field_name));
       DBUG_RETURN(0);
     }
-    // Clear indexed marker
-    field->flags&= ~FIELD_IN_ADD_INDEX;
     changes|= tmp;
   }
 
@@ -5090,7 +5239,9 @@ mysql_compare_tables(TABLE *table,
   for (new_key= *key_info_buffer; new_key < new_key_end; new_key++)
   {
     /* Search an old key with the same name. */
-    for (table_key= table->key_info; table_key < table_key_end; table_key++)
+    for (i= 0, table_key= table->key_info;
+         table_key < table_key_end;
+         i++, table_key++)
     {
       if (! strcmp(table_key->name, new_key->name))
         break;
@@ -5109,21 +5260,31 @@ mysql_compare_tables(TABLE *table,
       }
       DBUG_PRINT("info", ("index added: '%s'", new_key->name));
     }
+    else
+    {
+      DBUG_ASSERT(i < table->s->keys);
+      create_info->indexes_option_struct[i]= new_key->option_struct;
+    }
   }
 
   /* Check if changes are compatible with current handler without a copy */
   if (table->file->check_if_incompatible_data(create_info, changes))
   {
-    *need_copy_table= ALTER_TABLE_DATA_CHANGED;
+    DBUG_PRINT("info", ("check_if_incompatible_data() -> "
+                        "ALTER_TABLE_DATA_CHANGED"));
     DBUG_RETURN(0);
   }
 
   if (*index_drop_count || *index_add_count)
   {
+    DBUG_PRINT("info", ("Index dropped=%u added=%u -> "
+                        "ALTER_TABLE_INDEX_CHANGED",
+                        *index_drop_count, *index_add_count));
     *need_copy_table= ALTER_TABLE_INDEX_CHANGED;
     DBUG_RETURN(0);
   }
 
+  DBUG_PRINT("info", (" -> ALTER_TABLE_METADATA_ONLY"));
   *need_copy_table= ALTER_TABLE_METADATA_ONLY; // Tables are compatible
   DBUG_RETURN(0);
 }
@@ -5242,6 +5403,7 @@ blob_length_by_type(enum_field_types type)
     Sets create_info->varchar if the table has a VARCHAR column.
     Prepares alter_info->create_list and alter_info->key_list with
     columns and keys of the new table.
+
   @retval TRUE   error, out of memory or a semantical error in ALTER
                  TABLE instructions
   @retval FALSE  success
@@ -5268,7 +5430,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
   uint used_fields= create_info->used_fields;
   KEY *key_info=table->key_info;
   bool rc= TRUE;
-
+  Create_field *def;
+  Field **f_ptr,*field;
   DBUG_ENTER("mysql_prepare_alter_table");
 
   create_info->varchar= FALSE;
@@ -5289,26 +5452,22 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
   }
   if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE))
     create_info->key_block_size= table->s->key_block_size;
-
-  if (!create_info->tablespace)
-    create_info->tablespace= table->s->tablespace;
-
-  if (create_info->storage_media == HA_SM_DEFAULT)
-    create_info->storage_media= table->s->default_storage_media;
+  if (!(used_fields & HA_CREATE_USED_TRANSACTIONAL))
+    create_info->transactional= table->s->transactional;
 
   restore_record(table, s->default_values);     // Empty record for DEFAULT
-  Create_field *def;
 
+  create_info->option_list= merge_engine_table_options(table->s->option_list,
+                                        create_info->option_list, thd->mem_root);
   /*
     First collect all fields from table which isn't in drop_list
   */
-  Field **f_ptr,*field;
   for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
   {
-    if (field->type() == MYSQL_TYPE_STRING)
+    Alter_drop *drop;
+    if (field->type() == MYSQL_TYPE_VARCHAR)
       create_info->varchar= TRUE;
     /* Check if field should be dropped */
-    Alter_drop *drop;
     drop_it.rewind();
     while ((drop=drop_it++))
     {
@@ -5347,6 +5506,11 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
     if (def)
     {						// Field is changed
       def->field=field;
+      if (field->stored_in_db != def->stored_in_db)
+      {
+        my_error(ER_UNSUPPORTED_ACTION_ON_VIRTUAL_COLUMN, MYF(0));
+        goto err;
+      }
       if (!def->after)
       {
 	new_create_list.push_back(def);
@@ -5388,7 +5552,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
   {
     if (def->change && ! def->field)
     {
-      my_error(ER_BAD_FIELD_ERROR, MYF(0), def->change, table->s->table_name.str);
+      my_error(ER_BAD_FIELD_ERROR, MYF(0), def->change,
+               table->s->table_name.str);
       goto err;
     }
     /*
@@ -5430,7 +5595,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
       }
       if (!find)
       {
-	my_error(ER_BAD_FIELD_ERROR, MYF(0), def->after, table->s->table_name.str);
+	my_error(ER_BAD_FIELD_ERROR, MYF(0), def->after,
+                 table->s->table_name.str);
         goto err;
       }
       find_it.after(def);			// Put element after this
@@ -5484,6 +5650,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
 	continue;				// Wrong field (from UNIREG)
       const char *key_part_name=key_part->field->field_name;
       Create_field *cfield;
+      uint key_part_length;
+
       field_it.rewind();
       while ((cfield=field_it++))
       {
@@ -5499,7 +5667,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
       }
       if (!cfield)
 	continue;				// Field is removed
-      uint key_part_length=key_part->length;
+      key_part_length= key_part->length;
       if (cfield->field)			// Not new field
       {
         /*
@@ -5572,7 +5740,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
       key= new Key(key_type, key_name, strlen(key_name),
                    &key_create_info,
                    test(key_info->flags & HA_GENERATED_KEY),
-                   key_parts);
+                   key_parts, key_info->option_list);
       new_key_list.push_back(key);
     }
   }
@@ -5580,6 +5748,9 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
     Key *key;
     while ((key=key_it++))			// Add new keys
     {
+      if (key->type == Key::FOREIGN_KEY &&
+          ((Foreign_key *)key)->validate(new_create_list))
+        goto err;
       if (key->type != Key::FOREIGN_KEY)
         new_key_list.push_back(key);
       if (key->name.str &&
@@ -5651,6 +5822,7 @@ err:
       order_num        How many ORDER BY fields has been specified.
       order            List of fields to ORDER BY.
       ignore           Whether we have ALTER IGNORE TABLE
+      require_online   Give an error if we can't do operation online
 
   DESCRIPTION
     This is a veery long function and is everything but the kitchen sink :)
@@ -5681,13 +5853,15 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
                        HA_CREATE_INFO *create_info,
                        TABLE_LIST *table_list,
                        Alter_info *alter_info,
-                       uint order_num, ORDER *order, bool ignore)
+                       uint order_num, ORDER *order, bool ignore,
+                       bool require_online)
 {
   TABLE *table, *new_table= 0;
   MDL_ticket *mdl_ticket;
   MDL_request target_mdl_request;
   int error= 0;
   char tmp_name[80],old_name[32],new_name_buff[FN_REFLEN + 1];
+  char old_name_buff[FN_REFLEN + 1];
   char new_alias_buff[FN_REFLEN], *table_name, *db, *new_alias, *alias;
   char index_file[FN_REFLEN], data_file[FN_REFLEN];
   char path[FN_REFLEN + 1];
@@ -5709,6 +5883,10 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   uint *index_add_buffer= NULL;
   uint candidate_key_count= 0;
   bool no_pk;
+  ulong explicit_used_fields= 0;
+  enum ha_extra_function extra_func= thd->locked_tables_mode
+                                       ? HA_EXTRA_NOT_USED
+                                       : HA_EXTRA_FORCE_REOPEN;
   DBUG_ENTER("mysql_alter_table");
 
   /*
@@ -5873,10 +6051,12 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
         */
         build_table_filename(new_name_buff, sizeof(new_name_buff) - 1,
                              new_db, new_name_buff, reg_ext, 0);
-        if (!access(new_name_buff, F_OK))
+        build_table_filename(old_name_buff, sizeof(old_name_buff) - 1,
+                             db, table_name, reg_ext, 0);
+        if (check_table_file_presence(old_name_buff, new_name_buff, new_db,
+                                      new_name, new_alias, TRUE))
 	{
 	  /* Table will be closed in do_command() */
-	  my_error(ER_TABLE_EXISTS_ERROR, MYF(0), new_alias);
           goto err;
 	}
       }
@@ -5922,19 +6102,21 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   }
 
   /*
-   If this is an ALTER TABLE and no explicit row type specified reuse
-   the table's row type.
-   Note : this is the same as if the row type was specified explicitly.
+    If this is an ALTER TABLE and no explicit row type specified reuse
+    the table's row type.
+    Note: this is the same as if the row type was specified explicitly and
+    we must thus set HA_CREATE_USED_ROW_FORMAT!
   */
   if (create_info->row_type == ROW_TYPE_NOT_USED)
   {
     /* ALTER TABLE without explicit row type */
     create_info->row_type= table->s->row_type;
-  }
-  else
-  {
-    /* ALTER TABLE with specific row type */
-    create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
+    /*
+      We have to mark the row type as used, as otherwise the engine may
+      change the row format in update_create_info().
+    */
+    create_info->used_fields|= HA_CREATE_USED_ROW_FORMAT;
+    explicit_used_fields|= HA_CREATE_USED_ROW_FORMAT;
   }
 
   DBUG_PRINT("info", ("old type: %s  new type: %s",
@@ -5956,13 +6138,13 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
     case LEAVE_AS_IS:
       break;
     case ENABLE:
-      if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN))
+      if (wait_while_table_is_used(thd, table, extra_func))
         goto err;
       DBUG_EXECUTE_IF("sleep_alter_enable_indexes", my_sleep(6000000););
       error= table->file->ha_enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
       break;
     case DISABLE:
-      if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN))
+      if (wait_while_table_is_used(thd, table, extra_func))
         goto err;
       error=table->file->ha_disable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
       break;
@@ -5975,8 +6157,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
     {
       error= 0;
       push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
-                          ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
-                          table->alias);
+			  ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
+			  table->alias.c_ptr());
     }
 
     if (!error && (new_name != table_name || new_db != db))
@@ -5990,7 +6172,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
         simple rename did nothing and therefore we can safely return
         without additional clean-up.
       */
-      if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN))
+      if (wait_while_table_is_used(thd, table, extra_func))
         goto err;
       close_all_tables_for_name(thd, table->s, TRUE);
       /*
@@ -6026,8 +6208,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
     {
       error= 0;
       push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
-                          ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
-                          table->alias);
+			  ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
+			  table->alias.c_ptr());
     }
 
     if (!error)
@@ -6088,6 +6270,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   if (mysql_prepare_alter_table(thd, table, create_info, alter_info))
     goto err;
   
+  /* Remove markers set for update_create_info */
+  create_info->used_fields&= ~explicit_used_fields;
+
   if (need_copy_table == ALTER_TABLE_METADATA_ONLY)
     need_copy_table= alter_info->change_level;
 
@@ -6244,6 +6429,16 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
           /* Non-primary unique key. */
           needed_inplace_with_read_flags|= HA_INPLACE_ADD_UNIQUE_INDEX_NO_WRITE;
           needed_inplace_flags|= HA_INPLACE_ADD_UNIQUE_INDEX_NO_READ_WRITE;
+          if (ignore)
+          {
+            /*
+              If ignore is used, we have to remove all duplicate rows,
+              which require a full table copy.
+            */
+            need_copy_table= ALTER_TABLE_DATA_CHANGED;
+            pk_changed= 2;                      // Don't change need_copy_table
+            break;
+          }
         }
       }
       else
@@ -6414,10 +6609,23 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
     */
   }
 
+  /* Check if we can do the ALTER TABLE as online */
+  if (require_online)
+  {
+    if (index_add_count || index_drop_count ||
+        (new_table &&
+         !(new_table->file->ha_table_flags() & HA_NO_COPY_ON_ALTER)))
+    {
+      my_error(ER_CANT_DO_ONLINE, MYF(0), "ALTER");
+      goto err_new_table_cleanup;
+    }
+  }
+
   /* Copy the data if necessary. */
   thd->count_cuted_fields= CHECK_FIELD_WARN;	// calc cuted fields
   thd->cuted_fields=0L;
   copied=deleted=0;
+
   /*
     We do not copy data for MERGE tables. Only the children have data.
     MERGE tables have HA_NO_COPY_ON_ALTER set.
@@ -6427,12 +6635,11 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
     /* We don't want update TIMESTAMP fields during ALTER TABLE. */
     new_table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
     new_table->next_number_field=new_table->found_next_number_field;
-    thd_proc_info(thd, "copy to tmp table");
     DBUG_EXECUTE_IF("abort_copy_table", {
         my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0));
         goto err_new_table_cleanup;
       });
-    error= copy_data_between_tables(table, new_table,
+    error= copy_data_between_tables(thd, table, new_table,
                                     alter_info->create_list, ignore,
                                     order_num, order, &copied, &deleted,
                                     alter_info->keys_onoff,
@@ -6448,7 +6655,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
         table->file->indexes_are_disabled())
       need_lock_for_indexes= true;
     if (!table->s->tmp_table && need_lock_for_indexes &&
-        wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN))
+        wait_while_table_is_used(thd, table, extra_func))
       goto err_new_table_cleanup;
     thd_proc_info(thd, "manage keys");
     DEBUG_SYNC(thd, "alter_table_manage_keys");
@@ -6497,6 +6704,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
       if ((error= table->file->add_index(table, key_info, index_add_count,
                                          &add)))
       {
+        /* Only report error if handler has not already reported an error */
+        if (!thd->is_error())
+        {
         /*
           Exchange the key_info for the error message. If we exchange
           key number by key name in the message later, we need correct info.
@@ -6505,6 +6715,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
         table->key_info= key_info;
         table->file->print_error(error, MYF(0));
         table->key_info= save_key_info;
+        }
         goto err_new_table_cleanup;
       }
       pending_inplace_add_index= true;
@@ -6568,6 +6779,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   }
   /*end of if (! new_table) for add/drop index*/
 
+  DBUG_ASSERT(error == 0);
+
   if (table->s->tmp_table != NO_TMP_TABLE)
   {
     /*
@@ -6788,27 +7001,6 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   if (write_bin_log(thd, TRUE, thd->query(), thd->query_length()))
     DBUG_RETURN(TRUE);
 
-  if (ha_check_storage_engine_flag(old_db_type, HTON_FLUSH_AFTER_RENAME))
-  {
-    /*
-      For the alter table to be properly flushed to the logs, we
-      have to open the new table.  If not, we get a problem on server
-      shutdown. But we do not need to attach MERGE children.
-    */
-    char path[FN_REFLEN];
-    TABLE *t_table;
-    build_table_filename(path + 1, sizeof(path) - 1, new_db, table_name, "", 0);
-    t_table= open_table_uncached(thd, path, new_db, tmp_name, FALSE);
-    if (t_table)
-    {
-      intern_close_table(t_table);
-      my_free(t_table);
-    }
-    else
-      sql_print_warning("Could not open table %s.%s after rename\n",
-                        new_db,table_name);
-    ha_flush_logs(old_db_type);
-  }
   table_list->table=0;				// For query cache
   query_cache_invalidate3(thd, table_list, 0);
 
@@ -6891,7 +7083,9 @@ err_with_mdl:
   thd->mdl_context.release_all_locks_for_name(mdl_ticket);
   DBUG_RETURN(TRUE);
 }
-/* mysql_alter_table */
+
+
+/* Copy all rows from one table to another */
 
 
 
@@ -6942,7 +7136,7 @@ bool mysql_trans_commit_alter_copy_data(THD *thd)
 
 
 static int
-copy_data_between_tables(TABLE *from,TABLE *to,
+copy_data_between_tables(THD *thd, TABLE *from,TABLE *to,
 			 List<Create_field> &create,
                          bool ignore,
 			 uint order_num, ORDER *order,
@@ -6951,10 +7145,9 @@ copy_data_between_tables(TABLE *from,TABLE *to,
                          enum enum_enable_or_disable keys_onoff,
                          bool error_if_not_empty)
 {
-  int error;
-  Copy_field *copy,*copy_end;
-  ulong found_count,delete_count;
-  THD *thd= current_thd;
+  int error= 1, errpos= 0;
+  Copy_field *copy= NULL, *copy_end;
+  ha_rows found_count= 0, delete_count= 0;
   uint length= 0;
   SORT_FIELD *sortorder;
   READ_RECORD info;
@@ -6963,18 +7156,25 @@ copy_data_between_tables(TABLE *from,TABLE *to,
   List<Item>   all_fields;
   ha_rows examined_rows;
   bool auto_increment_field_copied= 0;
-  ulong save_sql_mode;
-  ulonglong prev_insert_id;
+  ulong save_sql_mode= thd->variables.sql_mode;
+  ulonglong prev_insert_id, time_to_report_progress;
+  List_iterator<Create_field> it(create);
+  Create_field *def;
   DBUG_ENTER("copy_data_between_tables");
 
+  /* Two or 3 stages; Sorting, copying data and update indexes */
+  thd_progress_init(thd, 2 + test(order));
+
   if (mysql_trans_prepare_alter_copy_data(thd))
-    DBUG_RETURN(-1);
-  
+    goto err;
+  errpos=1;
+
   if (!(copy= new Copy_field[to->s->fields]))
-    DBUG_RETURN(-1);				/* purecov: inspected */
+    goto err;		/* purecov: inspected */
 
   if (to->file->ha_external_lock(thd, F_WRLCK))
-    DBUG_RETURN(-1);
+    goto err;
+  errpos= 2;
 
   /* We need external lock before we can disable/enable keys */
   alter_table_manage_keys(to, from->file->indexes_are_disabled(), keys_onoff);
@@ -6986,11 +7186,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
 
   from->file->info(HA_STATUS_VARIABLE);
   to->file->ha_start_bulk_insert(from->file->stats.records);
+  errpos= 3;
 
-  save_sql_mode= thd->variables.sql_mode;
-
-  List_iterator<Create_field> it(create);
-  Create_field *def;
   copy_end=copy;
   for (Field **ptr=to->field ; *ptr ; ptr++)
   {
@@ -7014,11 +7211,10 @@ copy_data_between_tables(TABLE *from,TABLE *to,
 
   }
 
-  found_count=delete_count=0;
-
   if (order)
   {
-    if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered())
+    if (to->s->primary_key != MAX_KEY &&
+        to->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX)
     {
       char warn_buff[MYSQL_ERRMSG_SIZE];
       my_snprintf(warn_buff, sizeof(warn_buff), 
@@ -7035,8 +7231,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       tables.table= from;
       tables.alias= tables.table_name= from->s->table_name.str;
       tables.db= from->s->db.str;
-      error= 1;
 
+      thd_proc_info(thd, "Sorting");
       if (thd->lex->select_lex.setup_ref_array(thd, order_num) ||
           setup_order(thd, thd->lex->select_lex.ref_pointer_array,
                       &tables, fields, all_fields, order) ||
@@ -7047,15 +7243,24 @@ copy_data_between_tables(TABLE *from,TABLE *to,
           HA_POS_ERROR)
         goto err;
     }
-  };
+    thd_progress_next_stage(thd);
+  }
 
+  thd_proc_info(thd, "copy to tmp table");
   /* Tell handler that we have values for all columns in the to table */
   to->use_all_columns();
-  init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1, 1, FALSE);
+  to->mark_virtual_columns_for_write(TRUE);
+  if (init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1, 1, FALSE))
+    goto err;
+  errpos= 4;
   if (ignore)
     to->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
   thd->warning_info->reset_current_row_for_warning();
   restore_record(to, s->default_values);        // Create empty record
+
+  thd->progress.max_counter= from->file->records();
+  time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10;
+
   while (!(error=info.read_record(&info)))
   {
     if (thd->killed)
@@ -7064,6 +7269,14 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       error= 1;
       break;
     }
+    update_virtual_fields(thd, from);
+    if (++thd->progress.counter >= time_to_report_progress)
+    {
+      time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10;
+      thd_progress_report(thd, thd->progress.counter,
+                          thd->progress.max_counter);
+    }
+
     /* Return error if source table isn't empty. */
     if (error_if_not_empty)
     {
@@ -7083,6 +7296,12 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       copy_ptr->do_copy(copy_ptr);
     }
     prev_insert_id= to->file->next_insert_id;
+    update_virtual_fields(thd, to, TRUE);
+    if (thd->is_error())
+    {
+      error= 1;
+      break;
+    }
     error=to->file->ha_write_row(to->record[0]);
     to->auto_increment_field_not_null= FALSE;
     if (error)
@@ -7115,28 +7334,34 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       found_count++;
     thd->warning_info->inc_current_row_for_warning();
   }
-  end_read_record(&info);
+
+err:
+  if (errpos >= 4)
+    end_read_record(&info);
   free_io_cache(from);
-  delete [] copy;				// This is never 0
+  delete [] copy;
+
+  thd_proc_info(thd, "Enabling keys");
+  thd_progress_next_stage(thd);
 
-  if (to->file->ha_end_bulk_insert() && error <= 0)
+  if (error > 0)
+    to->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
+  if (errpos >= 3 && to->file->ha_end_bulk_insert() && error <= 0)
   {
     to->file->print_error(my_errno,MYF(0));
     error= 1;
   }
   to->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
 
-  if (mysql_trans_commit_alter_copy_data(thd))
+  if (errpos >= 1 && mysql_trans_commit_alter_copy_data(thd))
     error= 1;
 
- err:
   thd->variables.sql_mode= save_sql_mode;
   thd->abort_on_warning= 0;
-  free_io_cache(from);
   *copied= found_count;
   *deleted=delete_count;
   to->file->ha_release_auto_increment();
-  if (to->file->ha_external_lock(thd,F_UNLCK))
+  if (errpos >= 2 && to->file->ha_external_lock(thd,F_UNLCK))
     error=1;
   if (error < 0 && to->file->extra(HA_EXTRA_PREPARE_FOR_RENAME))
     error= 1;
@@ -7181,7 +7406,7 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list)
   alter_info.flags= (ALTER_CHANGE_COLUMN | ALTER_RECREATE);
   DBUG_RETURN(mysql_alter_table(thd, NullS, NullS, &create_info,
                                 table_list, &alter_info, 0,
-                                (ORDER *) 0, 0));
+                                (ORDER *) 0, 0, 0));
 }
 
 
@@ -7206,7 +7431,7 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
   /* Open one table after the other to keep lock time as short as possible. */
   for (table= tables; table; table= table->next_local)
   {
-    char table_name[NAME_LEN*2+2];
+    char table_name[SAFE_NAME_LEN*2+2];
     TABLE *t;
 
     strxmov(table_name, table->db ,".", table->table_name, NullS);
@@ -7225,11 +7450,12 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
     }
     else
     {
-      if (t->file->ha_table_flags() & HA_HAS_CHECKSUM &&
-	  !(check_opt->flags & T_EXTEND))
+      /* Call ->checksum() if the table checksum matches 'old_mode' settings */
+      if (!(check_opt->flags & T_EXTEND) &&
+          (((t->file->ha_table_flags() & HA_HAS_OLD_CHECKSUM) && thd->variables.old_mode) ||
+           ((t->file->ha_table_flags() & HA_HAS_NEW_CHECKSUM) && !thd->variables.old_mode)))
 	protocol->store((ulonglong)t->file->checksum());
-      else if (!(t->file->ha_table_flags() & HA_HAS_CHECKSUM) &&
-	       (check_opt->flags & T_QUICK))
+      else if (check_opt->flags & T_QUICK)
 	protocol->store_null();
       else
       {
@@ -7256,7 +7482,7 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
               goto err;
             }
 	    ha_checksum row_crc= 0;
-            int error= t->file->rnd_next(t->record[0]);
+            int error= t->file->ha_rnd_next(t->record[0]);
             if (unlikely(error))
             {
               if (error == HA_ERR_RECORD_DELETED)
@@ -7277,6 +7503,8 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
 	    {
 	      Field *f= t->field[i];
 
+              if (! thd->variables.old_mode && f->is_real_null(0))
+                continue;
              /*
                BLOB and VARCHAR have pointers in their field, we must convert
                to string; GEOMETRY is implemented on top of BLOB.
@@ -7352,7 +7580,7 @@ static bool check_engine(THD *thd, const char *table_name,
     if (create_info->used_fields & HA_CREATE_USED_ENGINE)
     {
       my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
-               ha_resolve_storage_engine_name(*new_engine), "TEMPORARY");
+               hton_name(*new_engine)->str, "TEMPORARY");
       *new_engine= 0;
       return TRUE;
     }
diff --git a/sql/sql_table.h b/sql/sql_table.h
index 043a8085452..00de6ed1b8d 100644
--- a/sql/sql_table.h
+++ b/sql/sql_table.h
@@ -30,7 +30,6 @@ typedef struct st_ha_create_information HA_CREATE_INFO;
 typedef struct st_key KEY;
 typedef struct st_key_cache KEY_CACHE;
 typedef struct st_lock_param_type ALTER_PARTITION_PARAM_TYPE;
-typedef struct st_mysql_lex_string LEX_STRING;
 typedef struct st_order ORDER;
 class Alter_table_change_level;
 
@@ -108,9 +107,6 @@ enum enum_explain_filename_mode
 /* depends on errmsg.txt Database `db`, Table `t` ... */
 #define EXPLAIN_FILENAME_MAX_EXTRA_LENGTH 63
 
-#define MYSQL50_TABLE_NAME_PREFIX         "#mysql50#"
-#define MYSQL50_TABLE_NAME_PREFIX_LENGTH  9
-
 #define WFRM_WRITE_SHADOW 1
 #define WFRM_INSTALL_SHADOW 2
 #define WFRM_PACK_FRM 4
@@ -135,6 +131,9 @@ uint build_table_filename(char *buff, size_t bufflen, const char *db,
                           const char *table, const char *ext, uint flags);
 uint build_table_shadow_filename(char *buff, size_t bufflen,
                                  ALTER_PARTITION_PARAM_TYPE *lpt);
+bool check_table_file_presence(char *old_path, char *path, const char *db,
+                               const char *table_name, const char *alias,
+                               bool issue_error);
 bool mysql_create_table(THD *thd, TABLE_LIST *create_table,
                         HA_CREATE_INFO *create_info,
                         Alter_info *alter_info);
@@ -153,7 +152,8 @@ bool mysql_alter_table(THD *thd, char *new_db, char *new_name,
                        HA_CREATE_INFO *create_info,
                        TABLE_LIST *table_list,
                        Alter_info *alter_info,
-                       uint order_num, ORDER *order, bool ignore);
+                       uint order_num, ORDER *order, bool ignore,
+                       bool require_online);
 bool mysql_compare_tables(TABLE *table,
                           Alter_info *alter_info,
                           HA_CREATE_INFO *create_info,
diff --git a/sql/sql_tablespace.cc b/sql/sql_tablespace.cc
index b2a63f7cbb2..3f6daf7a9ec 100644
--- a/sql/sql_tablespace.cc
+++ b/sql/sql_tablespace.cc
@@ -38,7 +38,7 @@ int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info)
       push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                           ER_WARN_USING_OTHER_HANDLER,
                           ER(ER_WARN_USING_OTHER_HANDLER),
-                          ha_resolve_storage_engine_name(hton),
+                          hton_name(hton)->str,
                           ts_info->tablespace_name ? ts_info->tablespace_name
                                                 : ts_info->logfile_group_name);
   }
@@ -47,13 +47,14 @@ int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info)
   {
     if ((error= hton->alter_tablespace(hton, thd, ts_info)))
     {
-      if (error == HA_ADMIN_NOT_IMPLEMENTED)
+      if (error == 1)
       {
-        my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "");
+        DBUG_RETURN(1);
       }
-      else if (error == 1)
+
+      if (error == HA_ADMIN_NOT_IMPLEMENTED)
       {
-        DBUG_RETURN(1);
+        my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "");
       }
       else
       {
@@ -67,7 +68,7 @@ int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info)
     push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_ILLEGAL_HA_CREATE_OPTION,
                         ER(ER_ILLEGAL_HA_CREATE_OPTION),
-                        ha_resolve_storage_engine_name(hton),
+                        hton_name(hton)->str,
                         "TABLESPACE or LOGFILE GROUP");
   }
   error= write_bin_log(thd, FALSE, thd->query(), thd->query_length());
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 15a1bad8471..46f27f184fa 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -59,19 +59,20 @@ static const char *lock_descriptions[] =
 void
 print_where(COND *cond,const char *info, enum_query_type query_type)
 {
-  char buff[256];
+  char buff[1024];
   String str(buff,(uint32) sizeof(buff), system_charset_info);
   str.length(0);
+  str.extra_allocation(1024);
   if (cond)
     cond->print(&str, query_type);
-  str.append('\0');
 
   DBUG_LOCK_FILE;
   (void) fprintf(DBUG_FILE,"\nWHERE:(%s) %p ", info, cond);
-  (void) fputs(str.ptr(),DBUG_FILE);
+  (void) fputs(str.c_ptr_safe(),DBUG_FILE);
   (void) fputc('\n',DBUG_FILE);
   DBUG_UNLOCK_FILE;
 }
+
 	/* This is for debugging purposes */
 
 
@@ -168,10 +169,9 @@ void TEST_filesort(SORT_FIELD *sortorder,uint s_length)
       out.append(str);
     }
   }
-  out.append('\0');				// Purify doesn't like c_ptr()
   DBUG_LOCK_FILE;
   (void) fputs("\nInfo about FILESORT\n",DBUG_FILE);
-  fprintf(DBUG_FILE,"Sortorder: %s\n",out.ptr());
+  fprintf(DBUG_FILE,"Sortorder: %s\n",out.c_ptr_safe());
   DBUG_UNLOCK_FILE;
   DBUG_VOID_RETURN;
 }
@@ -180,64 +180,114 @@ void TEST_filesort(SORT_FIELD *sortorder,uint s_length)
 void
 TEST_join(JOIN *join)
 {
-  uint i,ref;
+  uint ref;
+  int i;
+  List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges);
+  JOIN_TAB_RANGE *jt_range;
   DBUG_ENTER("TEST_join");
 
-  /*
-    Assemble results of all the calls to full_name() first,
-    in order not to garble the tabular output below.
-  */
-  String ref_key_parts[MAX_TABLES];
-  for (i= 0; i < join->tables; i++)
-  {
-    JOIN_TAB *tab= join->join_tab + i;
-    for (ref= 0; ref < tab->ref.key_parts; ref++)
-    {
-      ref_key_parts[i].append(tab->ref.items[ref]->full_name());
-      ref_key_parts[i].append("  ");
-    }
-  }
-
   DBUG_LOCK_FILE;
   (void) fputs("\nInfo about JOIN\n",DBUG_FILE);
-  for (i=0 ; i < join->tables ; i++)
+  while ((jt_range= it++))
   {
-    JOIN_TAB *tab=join->join_tab+i;
-    TABLE *form=tab->table;
-    char key_map_buff[128];
-    fprintf(DBUG_FILE,"%-16.16s  type: %-7s  q_keys: %s  refs: %d  key: %d  len: %d\n",
-	    form->alias,
-	    join_type_str[tab->type],
-	    tab->keys.print(key_map_buff),
-	    tab->ref.key_parts,
-	    tab->ref.key,
-	    tab->ref.key_length);
-    if (tab->select)
+    /*
+      Assemble results of all the calls to full_name() first,
+      in order not to garble the tabular output below.
+    */
+    String ref_key_parts[MAX_TABLES];
+    int tables_in_range= jt_range->end - jt_range->start;
+    for (i= 0; i < tables_in_range; i++)
     {
-      char buf[MAX_KEY/8+1];
-      if (tab->use_quick == 2)
-	fprintf(DBUG_FILE,
-		"                  quick select checked for each record (keys: %s)\n",
-		tab->select->quick_keys.print(buf));
-      else if (tab->select->quick)
+      JOIN_TAB *tab= jt_range->start + i;
+      for (ref= 0; ref < tab->ref.key_parts; ref++)
       {
-	fprintf(DBUG_FILE, "                  quick select used:\n");
-        tab->select->quick->dbug_dump(18, FALSE);
+        ref_key_parts[i].append(tab->ref.items[ref]->full_name());
+        ref_key_parts[i].append("  ");
       }
-      else
-	(void) fputs("                  select used\n",DBUG_FILE);
     }
-    if (tab->ref.key_parts)
+
+    for (i= 0; i < tables_in_range; i++)
     {
-      fprintf(DBUG_FILE,
-              "                  refs:  %s\n", ref_key_parts[i].ptr());
+      JOIN_TAB *tab= jt_range->start + i;
+      TABLE *form=tab->table;
+      char key_map_buff[128];
+      fprintf(DBUG_FILE,"%-16.16s  type: %-7s  q_keys: %s  refs: %d  key: %d  len: %d\n",
+	    form->alias.c_ptr(),
+              join_type_str[tab->type],
+              tab->keys.print(key_map_buff),
+              tab->ref.key_parts,
+              tab->ref.key,
+              tab->ref.key_length);
+      if (tab->select)
+      {
+        char buf[MAX_KEY/8+1];
+        if (tab->use_quick == 2)
+          fprintf(DBUG_FILE,
+                  "                  quick select checked for each record (keys: %s)\n",
+                  tab->select->quick_keys.print(buf));
+        else if (tab->select->quick)
+        {
+          fprintf(DBUG_FILE, "                  quick select used:\n");
+          tab->select->quick->dbug_dump(18, FALSE);
+        }
+        else
+          (void)fputs("                  select used\n",DBUG_FILE);
+      }
+      if (tab->ref.key_parts)
+      {
+        fprintf(DBUG_FILE,
+              "                  refs:  %s\n", ref_key_parts[i].c_ptr_safe());
+      }
     }
+    (void)fputs("\n",DBUG_FILE);
   }
   DBUG_UNLOCK_FILE;
   DBUG_VOID_RETURN;
 }
 
 
+#define FT_KEYPART   (MAX_REF_PARTS+10)
+
+void print_keyuse(KEYUSE *keyuse)
+{
+  char buff[256];
+  char buf2[64]; 
+  const char *fieldname;
+  JOIN_TAB *join_tab= keyuse->table->reginfo.join_tab;
+  KEY *key_info= join_tab->get_keyinfo_by_key_no(keyuse->key);
+  String str(buff,(uint32) sizeof(buff), system_charset_info);
+  str.length(0);
+  keyuse->val->print(&str, QT_ORDINARY);
+  str.append('\0');
+  if (keyuse->is_for_hash_join())
+    fieldname= keyuse->table->field[keyuse->keypart]->field_name;
+  else if (keyuse->keypart == FT_KEYPART)
+    fieldname= "FT_KEYPART";
+  else
+    fieldname= key_info->key_part[keyuse->keypart].field->field_name;
+  ll2str(keyuse->used_tables, buf2, 16, 0); 
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "KEYUSE: %s.%s=%s  optimize: %u  used_tables: %s "
+          "ref_table_rows: %lu  keypart_map: %0lx\n",
+          keyuse->table->alias.c_ptr(), fieldname, str.ptr(),
+          (uint) keyuse->optimize, buf2, (ulong) keyuse->ref_table_rows, 
+          (ulong) keyuse->keypart_map);
+  DBUG_UNLOCK_FILE;
+  //key_part_map keypart_map; --?? there can be several? 
+}
+
+
+/* purecov: begin inspected */
+void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array)
+{
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "KEYUSE array (%d elements)\n", keyuse_array->elements);
+  DBUG_UNLOCK_FILE;
+  for(uint i=0; i < keyuse_array->elements; i++)
+    print_keyuse((KEYUSE*)dynamic_array_ptr(keyuse_array, i));
+}
+
+
 /* 
   Print the current state during query optimization.
 
@@ -338,12 +388,28 @@ print_plan(JOIN* join, uint idx, double record_count, double read_time,
   DBUG_UNLOCK_FILE;
 }
 
-#endif
 
-C_MODE_START
-static int dl_compare(const void *p1, const void *p2);
-static int print_key_cache_status(const char *name, KEY_CACHE *key_cache);
-C_MODE_END
+void print_sjm(SJ_MATERIALIZATION_INFO *sjm)
+{
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "\nsemi-join nest{\n");
+  fprintf(DBUG_FILE, "  tables { \n");
+  for (uint i= 0;i < sjm->tables; i++)
+  {
+    fprintf(DBUG_FILE, "    %s%s\n", 
+            sjm->positions[i].table->table->alias.c_ptr(),
+            (i == sjm->tables -1)? "": ",");
+  }
+  fprintf(DBUG_FILE, "  }\n");
+  fprintf(DBUG_FILE, "  materialize_cost= %g\n",
+          sjm->materialization_cost.total_cost());
+  fprintf(DBUG_FILE, "  rows= %g\n", sjm->rows);
+  fprintf(DBUG_FILE, "}\n");
+  DBUG_UNLOCK_FILE;
+}
+/* purecov: end */
+
+#endif
 
 typedef struct st_debug_lock
 {
@@ -354,6 +420,7 @@ typedef struct st_debug_lock
   enum thr_lock_type type;
 } TABLE_LOCK_INFO;
 
+C_MODE_START
 static int dl_compare(const void *p1, const void *p2)
 {
   TABLE_LOCK_INFO *a, *b;
@@ -371,6 +438,7 @@ static int dl_compare(const void *p1, const void *p2)
     return -1;
   return 1;
 }
+C_MODE_END
 
 
 static void push_locks_into_array(DYNAMIC_ARRAY *ar, THR_LOCK_DATA *data,
@@ -457,8 +525,9 @@ end:
   delete_dynamic(&saved_table_locks);
 }
 
-
-static int print_key_cache_status(const char *name, KEY_CACHE *key_cache)
+C_MODE_START
+static int print_key_cache_status(const char *name, KEY_CACHE *key_cache,
+                                  void *unused __attribute__((unused)))
 {
   char llbuff1[22];
   char llbuff2[22];
@@ -471,11 +540,15 @@ static int print_key_cache_status(const char *name, KEY_CACHE *key_cache)
   }
   else
   {
+    KEY_CACHE_STATISTICS stats;
+    get_key_cache_statistics(key_cache, 0, &stats);
+
     printf("%s\n\
 Buffer_size:    %10lu\n\
 Block_size:     %10lu\n\
 Division_limit: %10lu\n\
-Age_limit:      %10lu\n\
+Age_threshold:  %10lu\n\
+Partitions:     %10lu\n\
 blocks used:    %10lu\n\
 not flushed:    %10lu\n\
 w_requests:     %10s\n\
@@ -483,18 +556,21 @@ writes:         %10s\n\
 r_requests:     %10s\n\
 reads:          %10s\n\n",
 	   name,
-	   (ulong) key_cache->param_buff_size,
+	   (ulong)key_cache->param_buff_size,
            (ulong)key_cache->param_block_size,
 	   (ulong)key_cache->param_division_limit,
            (ulong)key_cache->param_age_threshold,
-	   key_cache->blocks_used,key_cache->global_blocks_changed,
-	   llstr(key_cache->global_cache_w_requests,llbuff1),
-           llstr(key_cache->global_cache_write,llbuff2),
-	   llstr(key_cache->global_cache_r_requests,llbuff3),
-           llstr(key_cache->global_cache_read,llbuff4));
+           (ulong)key_cache->param_partitions,
+	   (ulong)stats.blocks_used,
+           (ulong)stats.blocks_changed,
+	   llstr(stats.write_requests,llbuff1),
+           llstr(stats.writes,llbuff2),
+	   llstr(stats.read_requests,llbuff3),
+           llstr(stats.reads,llbuff4));
   }
   return 0;
 }
+C_MODE_END
 
 
 void mysql_print_status()
@@ -514,7 +590,7 @@ void mysql_print_status()
 #endif
   /* Print key cache status */
   puts("\nKey caches:");
-  process_key_caches(print_key_cache_status);
+  process_key_caches(print_key_cache_status, 0);
   mysql_mutex_lock(&LOCK_status);
   printf("\nhandler status:\n\
 read_key:   %10lu\n\
diff --git a/sql/sql_test.h b/sql/sql_test.h
index 3f897b8594a..3c1ee188eeb 100644
--- a/sql/sql_test.h
+++ b/sql/sql_test.h
@@ -30,6 +30,8 @@ void TEST_filesort(SORT_FIELD *sortorder,uint s_length);
 void TEST_join(JOIN *join);
 void print_plan(JOIN* join,uint idx, double record_count, double read_time,
                 double current_read_time, const char *info);
+void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array);
+void print_sjm(SJ_MATERIALIZATION_INFO *sjm);
 void dump_TABLE_LIST_graph(SELECT_LEX *select_lex, TABLE_LIST* tl);
 #endif
 void mysql_print_status();
diff --git a/sql/sql_time.cc b/sql/sql_time.cc
index 329f6f8a98d..e30c74a148e 100644
--- a/sql/sql_time.cc
+++ b/sql/sql_time.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -252,8 +253,9 @@ to_ascii(CHARSET_INFO *cs,
 
 
 /* Character set-aware version of str_to_time() */
-bool str_to_time(CHARSET_INFO *cs, const char *str,uint length,
-                 MYSQL_TIME *l_time, int *warning)
+timestamp_type
+str_to_time(CHARSET_INFO *cs, const char *str,uint length,
+                 MYSQL_TIME *l_time, ulong fuzzydate, int *warning)
 {
   char cnv[32];
   if ((cs->state & MY_CS_NONASCII) != 0)
@@ -261,7 +263,7 @@ bool str_to_time(CHARSET_INFO *cs, const char *str,uint length,
     length= to_ascii(cs, str, length, cnv, sizeof(cnv));
     str= cnv;
   }
-  return str_to_time(str, length, l_time, warning);
+  return str_to_time(str, length, l_time, fuzzydate, warning);
 }
 
 
@@ -291,76 +293,132 @@ timestamp_type str_to_datetime(CHARSET_INFO *cs,
 timestamp_type
 str_to_datetime_with_warn(CHARSET_INFO *cs,
                           const char *str, uint length, MYSQL_TIME *l_time,
-                          uint flags)
+                          ulong flags)
 {
   int was_cut;
   THD *thd= current_thd;
   timestamp_type ts_type;
   
   ts_type= str_to_datetime(cs, str, length, l_time,
-                           (flags | (thd->variables.sql_mode &
-                                     (MODE_INVALID_DATES |
-                                      MODE_NO_ZERO_DATE))),
+                           (flags | (sql_mode_for_dates(thd))),
                            &was_cut);
   if (was_cut || ts_type <= MYSQL_TIMESTAMP_ERROR)
-    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                 str, length, ts_type,  NullS);
+    make_truncated_value_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                 str, length, flags & TIME_TIME_ONLY ?
+                                 MYSQL_TIMESTAMP_TIME : ts_type, NullS);
   return ts_type;
 }
 
 
-/*
-  Convert a datetime from broken-down MYSQL_TIME representation to corresponding 
-  TIMESTAMP value.
+/**
+  converts a pair of numbers (integer part, microseconds) to MYSQL_TIME
 
-  SYNOPSIS
-    TIME_to_timestamp()
-      thd             - current thread
-      t               - datetime in broken-down representation, 
-      in_dst_time_gap - pointer to bool which is set to true if t represents
-                        value which doesn't exists (falls into the spring 
-                        time-gap) or to false otherwise.
-   
-  RETURN
-     Number seconds in UTC since start of Unix Epoch corresponding to t.
-     0 - t contains datetime value which is out of TIMESTAMP range.
-     
+  @param neg           sign of the time value
+  @param nr            integer part of the number to convert
+  @param sec_part      microsecond part of the number
+  @param ltime         converted value will be written here
+  @param fuzzydate     conversion flags (TIME_FUZZY_DATE, etc)
+  @param str           original number, as an ErrConv. For the warning
+  @param field_name    field name or NULL if not a field. For the warning
+  
+  @returns 0 for success, 1 for a failure
 */
-my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, my_bool *in_dst_time_gap)
+static bool number_to_time_with_warn(bool neg, ulonglong nr, ulong sec_part,
+                                     MYSQL_TIME *ltime, ulong fuzzydate,
+                                     const ErrConv *str,
+                                     const char *field_name)
 {
-  my_time_t timestamp;
+  int was_cut;
+  longlong res;
+  enum_field_types f_type;
 
-  *in_dst_time_gap= 0;
-  thd->time_zone_used= 1;
+  if (fuzzydate & TIME_TIME_ONLY)
+  {
+    f_type= MYSQL_TYPE_TIME;
+    res= number_to_time(neg, nr, sec_part, ltime, &was_cut);
+  }
+  else
+  {
+    f_type= MYSQL_TYPE_DATETIME;
+    res= neg ? -1 : number_to_datetime(nr, sec_part, ltime, fuzzydate, &was_cut);
+  }
 
-  timestamp= thd->variables.time_zone->TIME_to_gmt_sec(t, in_dst_time_gap);
-  if (timestamp)
+  if (res < 0 || (was_cut && !(fuzzydate & TIME_FUZZY_DATE)))
   {
-    return timestamp;
+    make_truncated_value_warning(current_thd,
+                                 MYSQL_ERROR::WARN_LEVEL_WARN, str,
+                                 res < 0 ? MYSQL_TIMESTAMP_ERROR
+                                         : mysql_type_to_time_type(f_type),
+                                 field_name);
   }
+  return res < 0;
+}
+
 
-  /* If we are here we have range error. */
-  return(0);
+bool double_to_datetime_with_warn(double value, MYSQL_TIME *ltime,
+                                  ulong fuzzydate, const char *field_name)
+{
+  const ErrConvDouble str(value);
+  bool neg= value < 0;
+
+  if (neg)
+    value= -value;
+
+  if (value > LONGLONG_MAX)
+    value= static_cast<double>(LONGLONG_MAX);
+
+  longlong nr= static_cast<ulonglong>(floor(value));
+  uint sec_part= static_cast<ulong>((value - floor(value))*TIME_SECOND_PART_FACTOR);
+  return number_to_time_with_warn(neg, nr, sec_part, ltime, fuzzydate, &str,
+                                  field_name);
+}
+
+
+bool decimal_to_datetime_with_warn(const my_decimal *value, MYSQL_TIME *ltime,
+                                   ulong fuzzydate, const char *field_name)
+{
+  const ErrConvDecimal str(value);
+  ulonglong nr;
+  ulong sec_part;
+  bool neg= my_decimal2seconds(value, &nr, &sec_part);
+  return number_to_time_with_warn(neg, nr, sec_part, ltime, fuzzydate, &str,
+                                  field_name);
+}
+
+
+bool int_to_datetime_with_warn(longlong value, MYSQL_TIME *ltime,
+                               ulong fuzzydate, const char *field_name)
+{
+  const ErrConvInteger str(value);
+  bool neg= value < 0;
+  return number_to_time_with_warn(neg, neg ? -value : value, 0, ltime,
+                                  fuzzydate, &str, field_name);
 }
 
 
 /*
-  Convert a time string to a MYSQL_TIME struct and produce a warning
-  if string was cut during conversion.
+  Convert a datetime from broken-down MYSQL_TIME representation to
+  corresponding TIMESTAMP value.
 
-  NOTE
-    See str_to_time() for more info.
+  SYNOPSIS
+    TIME_to_timestamp()
+      thd             - current thread
+      t               - datetime in broken-down representation, 
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
+   
+  RETURN
+     Number seconds in UTC since start of Unix Epoch corresponding to t.
+     0 - in case of ER_WARN_DATA_OUT_OF_RANGE
 */
-bool
-str_to_time_with_warn(CHARSET_INFO *cs,
-                      const char *str, uint length, MYSQL_TIME *l_time)
+
+my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, uint *error_code)
 {
-  int warning;
-  bool ret_val= str_to_time(str, length, l_time, &warning);
-  if (ret_val || warning)
-    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                 str, length, MYSQL_TIMESTAMP_TIME, NullS);
-  return ret_val;
+  thd->time_zone_used= 1;
+  return thd->variables.time_zone->TIME_to_gmt_sec(t, error_code);
 }
 
 
@@ -630,7 +688,7 @@ bool parse_date_time_format(timestamp_type format_type,
       return 0;
     break;
   default:
-    DBUG_ASSERT(1);
+    DBUG_ASSERT(0);
     break;
   }
   return 1;					// Error
@@ -725,11 +783,6 @@ KNOWN_DATE_TIME_FORMAT known_date_time_formats[6]=
 };
 
 
-/*
-   Return format string according format name.
-   If name is unknown, result is NULL
-*/
-
 const char *get_date_time_format_str(KNOWN_DATE_TIME_FORMAT *format,
 				     timestamp_type type)
 {
@@ -746,60 +799,15 @@ const char *get_date_time_format_str(KNOWN_DATE_TIME_FORMAT *format,
   }
 }
 
-/****************************************************************************
-  Functions to create default time/date/datetime strings
- 
-  NOTE:
-    For the moment the DATE_TIME_FORMAT argument is ignored becasue
-    MySQL doesn't support comparing of date/time/datetime strings that
-    are not in arbutary order as dates are compared as strings in some
-    context)
-    This functions don't check that given MYSQL_TIME structure members are
-    in valid range. If they are not, return value won't reflect any 
-    valid date either. Additionally, make_time doesn't take into
-    account time->day member: it's assumed that days have been converted
-    to hours already.
-****************************************************************************/
-
-void make_time(const DATE_TIME_FORMAT *format __attribute__((unused)),
-               const MYSQL_TIME *l_time, String *str)
-{
-  uint length= (uint) my_time_to_str(l_time, (char*) str->ptr());
-  str->length(length);
-  str->set_charset(&my_charset_numeric);
-}
-
-
-void make_date(const DATE_TIME_FORMAT *format __attribute__((unused)),
-               const MYSQL_TIME *l_time, String *str)
-{
-  uint length= (uint) my_date_to_str(l_time, (char*) str->ptr());
-  str->length(length);
-  str->set_charset(&my_charset_numeric);
-}
-
-
-void make_datetime(const DATE_TIME_FORMAT *format __attribute__((unused)),
-                   const MYSQL_TIME *l_time, String *str)
-{
-  uint length= (uint) my_datetime_to_str(l_time, (char*) str->ptr());
-  str->length(length);
-  str->set_charset(&my_charset_numeric);
-}
-
-
-void make_truncated_value_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
-                                  const char *str_val,
-				  uint str_length, timestamp_type time_type,
+void make_truncated_value_warning(THD *thd,
+                                  MYSQL_ERROR::enum_warning_level level,
+                                  const ErrConv *sval,
+				  timestamp_type time_type,
                                   const char *field_name)
 {
   char warn_buff[MYSQL_ERRMSG_SIZE];
   const char *type_str;
   CHARSET_INFO *cs= &my_charset_latin1;
-  char buff[128];
-  String str(buff,(uint32) sizeof(buff), system_charset_info);
-  str.copy(str_val, str_length, system_charset_info);
-  str[str_length]= 0;               // Ensure we have end 0 for snprintf
 
   switch (time_type) {
     case MYSQL_TIMESTAMP_DATE: 
@@ -816,32 +824,37 @@ void make_truncated_value_warning(THD *thd, MYSQL_ERROR::enum_warning_level leve
   if (field_name)
     cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff),
                        ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
-                       type_str, str.c_ptr(), field_name,
+                       type_str, sval->ptr(), field_name,
                        (ulong) thd->warning_info->current_row_for_warning());
   else
   {
     if (time_type > MYSQL_TIMESTAMP_ERROR)
       cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff),
                          ER(ER_TRUNCATED_WRONG_VALUE),
-                         type_str, str.c_ptr());
+                         type_str, sval->ptr());
     else
       cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff),
-                         ER(ER_WRONG_VALUE), type_str, str.c_ptr());
+                         ER(ER_WRONG_VALUE), type_str, sval->ptr());
   }
   push_warning(thd, level,
                ER_TRUNCATED_WRONG_VALUE, warn_buff);
 }
 
+
 /* Daynumber from year 0 to 9999-12-31 */
 #define MAX_DAY_NUMBER 3652424L
-
-bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL interval)
+#define COMBINE(X)                                                      \
+               (((((X)->day * 24LL + (X)->hour) * 60LL +                \
+                   (X)->minute) * 60LL + (X)->second)*1000000LL +       \
+                   (X)->second_part)
+#define GET_PART(X, N) X % N ## LL; X/= N ## LL
+
+bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type,
+                       INTERVAL interval)
 {
   long period, sign;
 
-  ltime->neg= 0;
-
-  sign= (interval.neg ? -1 : 1);
+  sign= (interval.neg == ltime->neg ? 1 : -1);
 
   switch (int_type) {
   case INTERVAL_SECOND:
@@ -858,35 +871,43 @@ bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL inter
   case INTERVAL_DAY_SECOND:
   case INTERVAL_DAY_MINUTE:
   case INTERVAL_DAY_HOUR:
+  case INTERVAL_DAY:
   {
-    longlong sec, days, daynr, microseconds, extra_sec;
-    ltime->time_type= MYSQL_TIMESTAMP_DATETIME; // Return full date
-    microseconds= ltime->second_part + sign*interval.second_part;
-    extra_sec= microseconds/1000000L;
-    microseconds= microseconds%1000000L;
-
-    sec=((ltime->day-1)*3600*24L+ltime->hour*3600+ltime->minute*60+
-	 ltime->second +
-	 sign* (longlong) (interval.day*3600*24L +
-                           interval.hour*LL(3600)+interval.minute*LL(60)+
-                           interval.second))+ extra_sec;
-    if (microseconds < 0)
+    longlong usec, daynr;
+    my_bool neg= 0;
+    enum enum_mysql_timestamp_type time_type= ltime->time_type;
+
+    if (time_type != MYSQL_TIMESTAMP_TIME)
+      ltime->day+= calc_daynr(ltime->year, ltime->month, 1) - 1;
+
+    usec= COMBINE(ltime) + sign*COMBINE(&interval);
+
+    if (usec < 0)
     {
-      microseconds+= LL(1000000);
-      sec--;
+      neg= 1;
+      usec= -usec;
     }
-    days= sec/(3600*LL(24));
-    sec-= days*3600*LL(24);
-    if (sec < 0)
+
+    ltime->second_part= GET_PART(usec, 1000000);
+    ltime->second= GET_PART(usec, 60);
+    ltime->minute= GET_PART(usec, 60);
+    ltime->neg^= neg;
+
+    if (time_type == MYSQL_TIMESTAMP_TIME)
     {
-      days--;
-      sec+= 3600*LL(24);
+      if (usec > TIME_MAX_HOUR)
+        goto invalid_date;
+      ltime->hour= static_cast<uint>(usec);
+      ltime->day= 0;
+      return 0;
     }
-    ltime->second_part= (uint) microseconds;
-    ltime->second= (uint) (sec % 60);
-    ltime->minute= (uint) (sec/60 % 60);
-    ltime->hour=   (uint) (sec/3600);
-    daynr= calc_daynr(ltime->year,ltime->month,1) + days;
+
+    if (int_type != INTERVAL_DAY)
+      ltime->time_type= MYSQL_TIMESTAMP_DATETIME; // Return full date
+
+    ltime->hour= GET_PART(usec, 24);
+    daynr= usec;
+
     /* Day number from year 0 to 9999-12-31 */
     if ((ulonglong) daynr > MAX_DAY_NUMBER)
       goto invalid_date;
@@ -894,7 +915,6 @@ bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL inter
                         &ltime->day);
     break;
   }
-  case INTERVAL_DAY:
   case INTERVAL_WEEK:
     period= (calc_daynr(ltime->year,ltime->month,ltime->day) +
              sign * (long) interval.day);
@@ -932,13 +952,15 @@ bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL inter
     goto null_date;
   }
 
-  return 0;					// Ok
+  if (ltime->time_type != MYSQL_TIMESTAMP_TIME)
+    return 0;                                   // Ok
 
 invalid_date:
   push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                       ER_DATETIME_FUNCTION_OVERFLOW,
                       ER(ER_DATETIME_FUNCTION_OVERFLOW),
-                      "datetime");
+                      ltime->time_type == MYSQL_TIMESTAMP_TIME ?
+                      "time" : "datetime");
 null_date:
   return 1;
 }
@@ -1038,19 +1060,14 @@ calc_time_diff(MYSQL_TIME *l_time1, MYSQL_TIME *l_time2, int l_sign, longlong *s
 
 int my_time_compare(MYSQL_TIME *a, MYSQL_TIME *b)
 {
-  ulonglong a_t= TIME_to_ulonglong_datetime(a);
-  ulonglong b_t= TIME_to_ulonglong_datetime(b);
+  ulonglong a_t= pack_time(a);
+  ulonglong b_t= pack_time(b);
 
   if (a_t < b_t)
     return -1;
   if (a_t > b_t)
     return 1;
 
-  if (a->second_part < b->second_part)
-    return -1;
-  if (a->second_part > b->second_part)
-    return 1;
-
   return 0;
 }
 
diff --git a/sql/sql_time.h b/sql/sql_time.h
index 0418a0e9621..7d0d9610872 100644
--- a/sql/sql_time.h
+++ b/sql/sql_time.h
@@ -34,17 +34,36 @@ typedef struct st_known_date_time_format KNOWN_DATE_TIME_FORMAT;
 ulong convert_period_to_month(ulong period);
 ulong convert_month_to_period(ulong month);
 void get_date_from_daynr(long daynr,uint *year, uint *month, uint *day);
-my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, my_bool *not_exist);
+my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, uint *error_code);
 bool str_to_time_with_warn(CHARSET_INFO *cs, const char *str, uint length,
-                           MYSQL_TIME *l_time);
+                           MYSQL_TIME *l_time, ulong fuzzydate);
 timestamp_type str_to_datetime_with_warn(CHARSET_INFO *cs, const char *str,
                                          uint length, MYSQL_TIME *l_time,
-                                         uint flags);
-void make_truncated_value_warning(THD *thd,
-                                  MYSQL_ERROR::enum_warning_level level,
-                                  const char *str_val, uint str_length,
+                                         ulong flags);
+bool double_to_datetime_with_warn(double value, MYSQL_TIME *ltime,
+                                  ulong fuzzydate,
+                                  const char *name);
+bool decimal_to_datetime_with_warn(const my_decimal *value, MYSQL_TIME *ltime,
+                                   ulong fuzzydate,
+                                   const char *name);
+bool int_to_datetime_with_warn(longlong value, MYSQL_TIME *ltime,
+                               ulong fuzzydate,
+                               const char *name);
+
+void make_truncated_value_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
+                                  const ErrConv *str_val,
                                   timestamp_type time_type,
                                   const char *field_name);
+
+static inline void make_truncated_value_warning(THD *thd,
+                MYSQL_ERROR::enum_warning_level level, const char *str_val,
+                uint str_length, timestamp_type time_type,
+                const char *field_name)
+{
+  const ErrConvString str(str_val, str_length, &my_charset_bin);
+  make_truncated_value_warning(thd, level, &str, time_type, field_name);
+}
+
 extern DATE_TIME_FORMAT *date_time_format_make(timestamp_type format_type,
 					       const char *format_str,
 					       uint format_length);
@@ -52,13 +71,6 @@ extern DATE_TIME_FORMAT *date_time_format_copy(THD *thd,
 					       DATE_TIME_FORMAT *format);
 const char *get_date_time_format_str(KNOWN_DATE_TIME_FORMAT *format,
 				     timestamp_type type);
-void make_date(const DATE_TIME_FORMAT *format, const MYSQL_TIME *l_time,
-               String *str);
-void make_time(const DATE_TIME_FORMAT *format, const MYSQL_TIME *l_time,
-               String *str);
-void make_datetime(const DATE_TIME_FORMAT *format, const MYSQL_TIME *l_time,
-                   String *str);
-
 /* MYSQL_TIME operations */
 bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type,
                        INTERVAL interval);
@@ -74,8 +86,8 @@ bool parse_date_time_format(timestamp_type format_type,
                             const char *format, uint format_length,
                             DATE_TIME_FORMAT *date_time_format);
 /* Character set-aware version of str_to_time() */
-bool str_to_time(CHARSET_INFO *cs, const char *str,uint length,
-                 MYSQL_TIME *l_time, int *warning);
+timestamp_type str_to_time(CHARSET_INFO *cs, const char *str,uint length,
+                 MYSQL_TIME *l_time, ulong fuzzydate, int *warning);
 /* Character set-aware version of str_to_datetime() */
 timestamp_type str_to_datetime(CHARSET_INFO *cs,
                                const char *str, uint length,
diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc
index 4269c468982..30da842e0c5 100644
--- a/sql/sql_trigger.cc
+++ b/sql/sql_trigger.cc
@@ -567,7 +567,8 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create)
     Ignore the return value for now. It's better to
     keep master/slave in consistent state.
   */
-  thd->locked_tables_list.reopen_tables(thd);
+  if (thd->locked_tables_list.reopen_tables(thd))
+    thd->clear_error();
 
   /*
     Invalidate SP-cache. That's needed because triggers may change list of
@@ -874,7 +875,7 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables,
 
   stmt_query->append(stmt_definition.str, stmt_definition.length);
 
-  trg_def->str= stmt_query->c_ptr();
+  trg_def->str= stmt_query->c_ptr_safe();
   trg_def->length= stmt_query->length();
 
   /* Create trigger definition file. */
@@ -1111,10 +1112,7 @@ void Table_triggers_list::set_table(TABLE *new_table)
 {
   trigger_table= new_table;
   for (Field **field= new_table->triggers->record1_field ; *field ; field++)
-  {
-    (*field)->table= (*field)->orig_table= new_table;
-    (*field)->table_name= &new_table->alias;
-  }
+    (*field)->init(new_table);
 }
 
 
@@ -1508,7 +1506,7 @@ bool Table_triggers_list::check_n_load(THD *thd, const char *db,
           To remove this prefix we use check_n_cut_mysql50_prefix().
         */
 
-        char fname[NAME_LEN + 1];
+        char fname[SAFE_NAME_LEN + 1];
         DBUG_ASSERT((!my_strcasecmp(table_alias_charset, lex.query_tables->db, db) ||
                      (check_n_cut_mysql50_prefix(db, fname, sizeof(fname)) &&
                       !my_strcasecmp(table_alias_charset, lex.query_tables->db, fname))));
@@ -2038,7 +2036,7 @@ bool Table_triggers_list::change_table_name(THD *thd, const char *db,
     */
     if (my_strcasecmp(table_alias_charset, db, new_db))
     {
-      char dbname[NAME_LEN + 1];
+      char dbname[SAFE_NAME_LEN + 1];
       if (check_n_cut_mysql50_prefix(db, dbname, sizeof(dbname)) && 
           !my_strcasecmp(table_alias_charset, dbname, new_db))
       {
diff --git a/sql/sql_trigger.h b/sql/sql_trigger.h
index 092b48170d8..84637b79818 100644
--- a/sql/sql_trigger.h
+++ b/sql/sql_trigger.h
@@ -53,7 +53,7 @@ enum trg_action_time_type
 /**
   This class holds all information about triggers of table.
 
-  QQ: Will it be merged into TABLE in the future ?
+  TODO: Will it be merged into TABLE in the future ?
 */
 
 class Table_triggers_list: public Sql_alloc
diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc
index bf2de4b39bb..7eb0cd7ebf7 100644
--- a/sql/sql_udf.cc
+++ b/sql/sql_udf.cc
@@ -178,7 +178,13 @@ void udf_init()
   }
 
   table= tables.table;
-  init_read_record(&read_record_info, new_thd, table, NULL,1,0,FALSE);
+  if (init_read_record(&read_record_info, new_thd, table, NULL,1,0,FALSE))
+  {
+    sql_print_error("Could not initialize init_read_record; udf's not "
+                    "loaded");
+    goto end;
+  }
+
   table->use_all_columns();
   while (!(error= read_record_info.read_record(&read_record_info)))
   {
@@ -232,7 +238,7 @@ void udf_init()
     }
     tmp->dlhandle = dl;
     {
-      char buf[NAME_LEN+16], *missing;
+      char buf[SAFE_NAME_LEN+16], *missing;
       if ((missing= init_syms(tmp, buf)))
       {
         sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), missing);
@@ -481,7 +487,7 @@ int mysql_create_function(THD *thd,udf_func *udf)
   }
   udf->dlhandle=dl;
   {
-    char buf[NAME_LEN+16], *missing;
+    char buf[SAFE_NAME_LEN+16], *missing;
     if ((missing= init_syms(udf, buf)))
     {
       my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), missing);
@@ -598,10 +604,10 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name)
     goto err;
   table->use_all_columns();
   table->field[0]->store(exact_name_str, exact_name_len, &my_charset_bin);
-  if (!table->file->index_read_idx_map(table->record[0], 0,
-                                       (uchar*) table->field[0]->ptr,
-                                       HA_WHOLE_KEY,
-                                       HA_READ_KEY_EXACT))
+  if (!table->file->ha_index_read_idx_map(table->record[0], 0,
+                                          (uchar*) table->field[0]->ptr,
+                                          HA_WHOLE_KEY,
+                                          HA_READ_KEY_EXACT))
   {
     int error;
     if ((error = table->file->ha_delete_row(table->record[0])))
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index b0a25c2df97..3f3d140dad8 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -52,23 +52,33 @@ int select_union::prepare(List<Item> &list, SELECT_LEX_UNIT *u)
 }
 
 
-bool select_union::send_data(List<Item> &values)
+int select_union::send_data(List<Item> &values)
 {
-  int error= 0;
   if (unit->offset_limit_cnt)
   {						// using limit offset,count
     unit->offset_limit_cnt--;
     return 0;
   }
-  fill_record(thd, table->field, values, 1);
+  fill_record(thd, table->field, values, TRUE, FALSE);
   if (thd->is_error())
     return 1;
 
-  if ((error= table->file->ha_write_row(table->record[0])))
+  if ((write_err= table->file->ha_write_tmp_row(table->record[0])))
   {
-    /* create_myisam_from_heap will generate error if needed */
-    if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
-        create_myisam_from_heap(thd, table, &tmp_table_param, error, 1))
+    if (write_err == HA_ERR_FOUND_DUPP_KEY)
+    {
+      /*
+        Inform upper level that we found a duplicate key, that should not
+        be counted as part of limit
+      */
+      return -1;
+    }
+    /* create_internal_tmp_table_from_heap will generate error if needed */
+    if (table->file->is_fatal_error(write_err, HA_CHECK_DUP) &&
+        create_internal_tmp_table_from_heap(thd, table,
+                                            tmp_table_param.start_recinfo, 
+                                            &tmp_table_param.recinfo,
+                                            write_err, 1))
       return 1;
   }
   return 0;
@@ -103,6 +113,9 @@ bool select_union::flush()
       is_union_distinct  if set, the temporary table will eliminate
                          duplicates on insert
       options            create options
+      table_alias        name of the temporary table
+      bit_fields_as_long convert bit fields to ulonglong
+      create_table       whether to physically create result table
 
   DESCRIPTION
     Create a temporary table that is used to store the result of a UNION,
@@ -116,22 +129,49 @@ bool select_union::flush()
 bool
 select_union::create_result_table(THD *thd_arg, List<Item> *column_types,
                                   bool is_union_distinct, ulonglong options,
-                                  const char *alias)
+                                  const char *alias,
+                                  bool bit_fields_as_long, bool create_table)
 {
   DBUG_ASSERT(table == 0);
   tmp_table_param.init();
   tmp_table_param.field_count= column_types->elements;
+  tmp_table_param.bit_fields_as_long= bit_fields_as_long;
 
   if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
                                  (ORDER*) 0, is_union_distinct, 1,
-                                 options, HA_POS_ERROR, alias)))
+                                 options, HA_POS_ERROR, alias,
+                                 !create_table)))
     return TRUE;
-  table->file->extra(HA_EXTRA_WRITE_CACHE);
-  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+
+  table->keys_in_use_for_query.clear_all();
+  for (uint i=0; i < table->s->fields; i++)
+    table->field[i]->flags &= ~PART_KEY_FLAG;
+
+  if (create_table)
+  {
+    table->file->extra(HA_EXTRA_WRITE_CACHE);
+    table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  }
   return FALSE;
 }
 
 
+/**
+  Reset and empty the temporary table that stores the materialized query result.
+
+  @note The cleanup performed here is exactly the same as for the two temp
+  tables of JOIN - exec_tmp_table_[1 | 2].
+*/
+
+void select_union::cleanup()
+{
+  table->file->extra(HA_EXTRA_RESET_STATE);
+  table->file->ha_delete_all_rows();
+  free_io_cache(table);
+  filesort_free_buffers(table,0);
+}
+
+
 /*
   initialization procedures before fake_select_lex preparation()
 
@@ -165,6 +205,8 @@ st_select_lex_unit::init_prepare_fake_select_lex(THD *thd_arg)
   {
     (*order->item)->walk(&Item::change_context_processor, 0,
                          (uchar*) &fake_select_lex->context);
+    (*order->item)->walk(&Item::set_fake_select_as_master_processor, 0,
+                         (uchar*) fake_select_lex);
   }
 }
 
@@ -249,6 +291,18 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
 
     can_skip_order_by= is_union_select && !(sl->braces && sl->explicit_limit);
 
+    /*
+      Remove all references from the select_lex_units to the subqueries that
+      are inside the ORDER BY clause.
+    */
+    if (can_skip_order_by)
+    {
+      for (ORDER *ord= (ORDER *)sl->order_list.first; ord; ord= ord->next)
+      {
+        (*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
+      }
+    }
+
     saved_error= join->prepare(&sl->ref_pointer_array,
                                sl->table_list.first,
                                sl->with_wild,
@@ -263,6 +317,7 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
                                (is_union_select ? NULL :
                                 thd_arg->lex->proc_list.first),
                                sl, this);
+
     /* There are no * in the statement anymore (for PS) */
     sl->with_wild= 0;
     last_procedure= join->procedure;
@@ -317,6 +372,9 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
     List_iterator_fast<Item> tp(types);
     Item *type;
     ulonglong create_options;
+    uint save_tablenr= 0;
+    table_map save_map= 0;
+    uint save_maybe_null= 0;
 
     while ((type= tp++))
     {
@@ -369,12 +427,24 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
       create_options= create_options | TMP_TABLE_FORCE_MYISAM;
 
     if (union_result->create_result_table(thd, &types, test(union_distinct),
-                                          create_options, ""))
+                                          create_options, "", FALSE, TRUE))
       goto err;
+    if (fake_select_lex && !fake_select_lex->first_cond_optimization)
+    {
+      save_tablenr= result_table_list.tablenr_exec;
+      save_map= result_table_list.map_exec;
+      save_maybe_null= result_table_list.maybe_null_exec;
+    }
     bzero((char*) &result_table_list, sizeof(result_table_list));
     result_table_list.db= (char*) "";
     result_table_list.table_name= result_table_list.alias= (char*) "union";
     result_table_list.table= table= union_result->table;
+    if (fake_select_lex && !fake_select_lex->first_cond_optimization)
+    {
+      result_table_list.tablenr_exec= save_tablenr;
+      result_table_list.map_exec= save_map;
+      result_table_list.maybe_null_exec= save_maybe_null;
+    }
 
     thd_arg->lex->current_select= lex_select_save;
     if (!item_list.elements)
@@ -451,18 +521,21 @@ err:
 }
 
 
-bool st_select_lex_unit::exec()
+/**
+  Run optimization phase.
+
+  @return FALSE unit successfully passed optimization phase.
+  @return TRUE an error occur.
+*/
+bool st_select_lex_unit::optimize()
 {
   SELECT_LEX *lex_select_save= thd->lex->current_select;
   SELECT_LEX *select_cursor=first_select();
-  ulonglong add_rows=0;
-  ha_rows examined_rows= 0;
-  DBUG_ENTER("st_select_lex_unit::exec");
+  DBUG_ENTER("st_select_lex_unit::optimize");
 
-  if (executed && !uncacheable && !describe)
+  if (optimized && !uncacheable && !describe)
     DBUG_RETURN(FALSE);
-  executed= 1;
-  
+
   if (uncacheable || !item || !item->assigned() || describe)
   {
     if (item)
@@ -483,7 +556,6 @@ bool st_select_lex_unit::exec()
     }
     for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
     {
-      ha_rows records_at_start= 0;
       thd->lex->current_select= sl;
 
       if (optimized)
@@ -510,6 +582,66 @@ bool st_select_lex_unit::exec()
         sl->join->select_options= 
           (select_limit_cnt == HA_POS_ERROR || sl->braces) ?
           sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union;
+
+	saved_error= sl->join->optimize();
+      }
+
+      if (saved_error)
+      {
+	thd->lex->current_select= lex_select_save;
+	DBUG_RETURN(saved_error);
+      }
+    }
+  }
+  optimized= 1;
+
+  thd->lex->current_select= lex_select_save;
+  DBUG_RETURN(saved_error);
+}
+
+
+bool st_select_lex_unit::exec()
+{
+  SELECT_LEX *lex_select_save= thd->lex->current_select;
+  SELECT_LEX *select_cursor=first_select();
+  ulonglong add_rows=0;
+  ha_rows examined_rows= 0;
+  DBUG_ENTER("st_select_lex_unit::exec");
+
+  if (executed && !uncacheable && !describe)
+    DBUG_RETURN(FALSE);
+  executed= 1;
+  
+  saved_error= optimize();
+
+  if (uncacheable || !item || !item->assigned() || describe)
+  {
+    for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+    {
+      ha_rows records_at_start= 0;
+      thd->lex->current_select= sl;
+
+      {
+        set_limit(sl);
+	if (sl == global_parameters || describe)
+	{
+	  offset_limit_cnt= 0;
+	  /*
+	    We can't use LIMIT at this stage if we are using ORDER BY for the
+	    whole query
+	  */
+	  if (sl->order_list.first || describe)
+	    select_limit_cnt= HA_POS_ERROR;
+        }
+
+        /*
+          When using braces, SQL_CALC_FOUND_ROWS affects the whole query:
+          we don't calculate found_rows() per union part.
+          Otherwise, SQL_CALC_FOUND_ROWS should be done on all sub parts.
+        */
+        sl->join->select_options= 
+          (select_limit_cnt == HA_POS_ERROR || sl->braces) ?
+          sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union;
 	saved_error= sl->join->optimize();
       }
       if (!saved_error)
@@ -562,7 +694,6 @@ bool st_select_lex_unit::exec()
       }
     }
   }
-  optimized= 1;
 
   /* Send result to 'result' */
   saved_error= TRUE;
@@ -694,7 +825,8 @@ bool st_select_lex_unit::cleanup()
     if ((join= fake_select_lex->join))
     {
       join->tables_list= 0;
-      join->tables= 0;
+      join->table_count= 0;
+      join->top_join_tab_count= 0;
     }
     error|= fake_select_lex->cleanup();
     /*
@@ -756,8 +888,8 @@ void st_select_lex_unit::reinit_exec_mechanism()
     TRUE  - error
 */
 
-bool st_select_lex_unit::change_result(select_subselect *new_result,
-                                       select_subselect *old_result)
+bool st_select_lex_unit::change_result(select_result_interceptor *new_result,
+                                       select_result_interceptor *old_result)
 {
   bool res= FALSE;
   for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
@@ -847,3 +979,27 @@ void st_select_lex::cleanup_all_joins(bool full)
     for (sl= unit->first_select(); sl; sl= sl->next_select())
       sl->cleanup_all_joins(full);
 }
+
+
+/**
+  Set exclude_from_table_unique_test for selects of this unit and all
+  underlying selects.
+
+  @note used to exclude materialized derived tables (views) from unique
+  table check.
+*/
+
+void st_select_lex_unit::set_unique_exclude()
+{
+  for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
+  {
+    sl->exclude_from_table_unique_test= TRUE;
+    for (SELECT_LEX_UNIT *unit= sl->first_inner_unit();
+         unit;
+         unit= unit->next_unit())
+    {
+      unit->set_unique_exclude();
+    }
+  }
+}
+
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 754170e4c55..9ab01736f66 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -45,7 +46,7 @@
 
 /**
    True if the table's input and output record buffers are comparable using
-   compare_records(TABLE*).
+   compare_record(TABLE*).
  */
 bool records_are_comparable(const TABLE *table) {
   return ((table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) == 0) ||
@@ -55,18 +56,13 @@ bool records_are_comparable(const TABLE *table) {
 
 /**
    Compares the input and outbut record buffers of the table to see if a row
-   has changed. The algorithm iterates over updated columns and if they are
-   nullable compares NULL bits in the buffer before comparing actual
-   data. Special care must be taken to compare only the relevant NULL bits and
-   mask out all others as they may be undefined. The storage engine will not
-   and should not touch them.
-
-   @param table The table to evaluate.
+   has changed.
 
    @return true if row has changed.
    @return false otherwise.
 */
-bool compare_records(const TABLE *table)
+
+bool compare_record(const TABLE *table)
 {
   DBUG_ASSERT(records_are_comparable(table));
 
@@ -102,13 +98,12 @@ bool compare_records(const TABLE *table)
      including those not in the write_set. This is cheaper than the field-by-field
      comparison done above.
   */ 
-  if (table->s->blob_fields + table->s->varchar_fields == 0)
-    // Fixed-size record: do bitwise comparison of the records 
+  if (table->s->can_cmp_whole_record)
     return cmp_record(table,record[1]);
   /* Compare null bits */
   if (memcmp(table->null_flags,
 	     table->null_flags+table->s->rec_buff_length,
-	     table->s->null_bytes))
+	     table->s->null_bytes_for_compare))
     return TRUE;				// Diff in NULL value
   /* Compare updated fields */
   for (Field **ptr= table->field ; *ptr ; ptr++)
@@ -217,7 +212,7 @@ static void prepare_record_for_error_message(int error, TABLE *table)
   /* Tell the engine about the new set. */
   table->file->column_bitmaps_signal();
   /* Read record that is identified by table->file->ref. */
-  (void) table->file->rnd_pos(table->record[1], table->file->ref);
+  (void) table->file->ha_rnd_pos(table->record[1], table->file->ref);
   /* Copy the newly read columns into the new record. */
   for (field_p= table->field; (field= *field_p); field_p++)
     if (bitmap_is_set(&unique_map, field->field_index))
@@ -261,6 +256,7 @@ int mysql_update(THD *thd,
   bool		using_limit= limit != HA_POS_ERROR;
   bool		safe_update= test(thd->variables.option_bits & OPTION_SAFE_UPDATES);
   bool          used_key_is_modified= FALSE, transactional_table, will_batch;
+  bool		can_compare_record;
   int           res;
   int		error, loc_error;
   uint          used_index, dup_key_found;
@@ -284,7 +280,11 @@ int mysql_update(THD *thd,
   if (open_tables(thd, &table_list, &table_count, 0))
     DBUG_RETURN(1);
 
-  if (table_list->multitable_view)
+  //Prepare views so they are handled correctly.
+  if (mysql_handle_derived(thd->lex, DT_INIT))
+    DBUG_RETURN(1);
+
+  if (table_list->is_multitable())
   {
     DBUG_ASSERT(table_list->view != 0);
     DBUG_PRINT("info", ("Switch to multi-update"));
@@ -296,20 +296,19 @@ int mysql_update(THD *thd,
   if (lock_tables(thd, table_list, table_count, 0))
     DBUG_RETURN(1);
 
-  if (mysql_handle_derived(thd->lex, &mysql_derived_prepare))
+  if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
     DBUG_RETURN(1);
-
-  if (thd->fill_derived_tables() &&
-      mysql_handle_derived(thd->lex, &mysql_derived_filling))
-  {
-    mysql_handle_derived(thd->lex, &mysql_derived_cleanup);
+  if (table_list->handle_derived(thd->lex, DT_PREPARE))
     DBUG_RETURN(1);
-  }
-  mysql_handle_derived(thd->lex, &mysql_derived_cleanup);
 
   thd_proc_info(thd, "init");
   table= table_list->table;
 
+  if (!table_list->updatable)
+  {
+    my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
+    DBUG_RETURN(1);
+  }
   /* Calculate "table->covering_keys" based on the WHERE */
   table->covering_keys= table->s->keys_in_use;
   table->quick_keys.clear_all();
@@ -328,13 +327,17 @@ int mysql_update(THD *thd,
   table_list->grant.want_privilege= table->grant.want_privilege= want_privilege;
   table_list->register_want_access(want_privilege);
 #endif
+  /* 'Unfix' fields to allow correct marking by the setup_fields function. */
+  if (table_list->is_view())
+    unfix_fields(fields);
+
   if (setup_fields_with_no_wrap(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0))
     DBUG_RETURN(1);                     /* purecov: inspected */
   if (table_list->view && check_fields(thd, fields))
   {
     DBUG_RETURN(1);
   }
-  if (!table_list->updatable || check_key_in_view(thd, table_list))
+  if (check_key_in_view(thd, table_list))
   {
     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
     DBUG_RETURN(1);
@@ -347,8 +350,7 @@ int mysql_update(THD *thd,
       table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
     else
     {
-      if (table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_UPDATE ||
-          table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_BOTH)
+      if (((uint) table->timestamp_field_type) & TIMESTAMP_AUTO_SET_ON_UPDATE)
         bitmap_set_bit(table->write_set,
                        table->timestamp_field->field_index);
     }
@@ -365,6 +367,10 @@ int mysql_update(THD *thd,
     DBUG_RETURN(1);				/* purecov: inspected */
   }
 
+  /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
+  if (select_lex->optimize_unflattened_subqueries())
+    DBUG_RETURN(TRUE);
+
   if (select_lex->inner_refs_list.elements &&
     fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array))
     DBUG_RETURN(1);
@@ -382,10 +388,8 @@ int mysql_update(THD *thd,
     update force the table handler to retrieve write-only fields to be able
     to compare records and detect data change.
   */
-  if (table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ &&
-      table->timestamp_field &&
-      (table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_UPDATE ||
-       table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_BOTH))
+  if ((table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) &&
+      (((uint) table->timestamp_field_type) & TIMESTAMP_AUTO_SET_ON_UPDATE))
     bitmap_union(table->read_set, table->write_set);
   // Don't count on usage of 'only index' when calculating which key to use
   table->covering_keys.clear_all();
@@ -440,23 +444,37 @@ int mysql_update(THD *thd,
   table->update_const_key_parts(conds);
   order= simple_remove_const(order, conds);
         
-  used_index= get_index_for_order(order, table, select, limit,
-                                  &need_sort, &reverse);
-  if (need_sort)
-  { // Assign table scan index to check below for modified key fields:
-    used_index= table->file->key_used_on_scan;
+  if (select && select->quick && select->quick->unique_key_range())
+  { // Single row select (always "ordered"): Ok to use with key field UPDATE
+    need_sort= FALSE;
+    used_index= MAX_KEY;
+    used_key_is_modified= FALSE;
   }
-  if (used_index != MAX_KEY)
-  { // Check if we are modifying a key that we are used to search with:
-    used_key_is_modified= is_key_used(table, used_index, table->write_set);
+  else
+  {
+    used_index= get_index_for_order(order, table, select, limit,
+                                    &need_sort, &reverse);
+    if (select && select->quick)
+    {
+      DBUG_ASSERT(need_sort || used_index == select->quick->index);
+      used_key_is_modified= (!select->quick->unique_key_range() &&
+                             select->quick->is_keys_used(table->write_set));
+    }
+    else
+    {
+      if (need_sort)
+      { // Assign table scan index to check below for modified key fields:
+        used_index= table->file->key_used_on_scan;
+      }
+      if (used_index != MAX_KEY)
+      { // Check if we are modifying a key that we are used to search with:
+        used_key_is_modified= is_key_used(table, used_index, table->write_set);
+      }
+    }
   }
 
-#ifdef WITH_PARTITION_STORAGE_ENGINE
   if (used_key_is_modified || order ||
       partition_key_modified(table, table->write_set))
-#else
-  if (used_key_is_modified || order)
-#endif
   {
     /*
       We can't update table directly;  We must first search after all
@@ -529,7 +547,10 @@ int mysql_update(THD *thd,
       */
 
       if (used_index == MAX_KEY || (select && select->quick))
-        init_read_record(&info, thd, table, select, 0, 1, FALSE);
+      {
+        if (init_read_record(&info, thd, table, select, 0, 1, FALSE))
+          goto err;
+      }
       else
         init_read_record_idx(&info, thd, table, 1, used_index, reverse);
 
@@ -538,15 +559,9 @@ int mysql_update(THD *thd,
 
       while (!(error=info.read_record(&info)) && !thd->killed)
       {
+        update_virtual_fields(thd, table);
         thd->examined_row_count++;
-        bool skip_record= FALSE;
-        if (select && select->skip_record(thd, &skip_record))
-        {
-          error= 1;
-          table->file->unlock_row();
-          break;
-        }
-        if (!skip_record)
+	if (!select || (error= select->skip_record(thd)) > 0)
 	{
           if (table->file->was_semi_consistent_read())
 	    continue;  /* repeat the read of the same row if it still exists */
@@ -565,7 +580,15 @@ int mysql_update(THD *thd,
 	  }
 	}
 	else
+        {
 	  table->file->unlock_row();
+          if (error < 0)
+          {
+            /* Fatal error from select->skip_record() */
+            error= 1;
+            break;
+          }
+        }
       }
       if (thd->killed && !error)
 	error= 1;				// Aborted
@@ -603,7 +626,8 @@ int mysql_update(THD *thd,
   if (select && select->quick && select->quick->reset())
     goto err;
   table->file->try_semi_consistent_read(1);
-  init_read_record(&info, thd, table, select, 0, 1, FALSE);
+  if (init_read_record(&info, thd, table, select, 0, 1, FALSE))
+    goto err;
 
   updated= found= 0;
   /*
@@ -641,11 +665,18 @@ int mysql_update(THD *thd,
   if (table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ)
     table->prepare_for_position();
 
+  /*
+    We can use compare_record() to optimize away updates if
+    the table handler is returning all columns OR if
+    if all updated columns are read
+  */
+  can_compare_record= records_are_comparable(table);
+
   while (!(error=info.read_record(&info)) && !thd->killed)
   {
+    update_virtual_fields(thd, table);
     thd->examined_row_count++;
-    bool skip_record;
-    if (!select || (!select->skip_record(thd, &skip_record) && !skip_record))
+    if (!select || select->skip_record(thd) > 0)
     {
       if (table->file->was_semi_consistent_read())
         continue;  /* repeat the read of the same row if it still exists */
@@ -658,7 +689,7 @@ int mysql_update(THD *thd,
 
       found++;
 
-      if (!records_are_comparable(table) || compare_records(table))
+      if (!can_compare_record || compare_record(table))
       {
         if ((res= table_list->view_check_option(thd, ignore)) !=
             VIEW_CHECK_OK)
@@ -906,6 +937,11 @@ int mysql_update(THD *thd,
   }
   thd->count_cuted_fields= CHECK_FIELD_IGNORE;		/* calc cuted fields */
   thd->abort_on_warning= 0;
+  if (thd->lex->current_select->first_cond_optimization)
+  {
+    thd->lex->current_select->save_leaf_tables(thd);
+    thd->lex->current_select->first_cond_optimization= 0;
+  }
   *found_return= found;
   *updated_return= updated;
   DBUG_RETURN((error >= 0 || thd->is_error()) ? 1 : 0);
@@ -913,7 +949,7 @@ int mysql_update(THD *thd,
 err:
   delete select;
   free_underlaid_joins(thd, select_lex);
-  table->set_keyread(FALSE);
+  table->disable_keyread();
   thd->abort_on_warning= 0;
   DBUG_RETURN(1);
 }
@@ -955,8 +991,8 @@ bool mysql_prepare_update(THD *thd, TABLE_LIST *table_list,
   if (setup_tables_and_check_access(thd, &select_lex->context, 
                                     &select_lex->top_join_list,
                                     table_list,
-                                    &select_lex->leaf_tables,
-                                    FALSE, UPDATE_ACL, SELECT_ACL) ||
+                                    select_lex->leaf_tables,
+                                    FALSE, UPDATE_ACL, SELECT_ACL, TRUE) ||
       setup_conds(thd, table_list, select_lex->leaf_tables, conds) ||
       select_lex->setup_ref_array(thd, order_num) ||
       setup_order(thd, select_lex->ref_pointer_array,
@@ -992,8 +1028,8 @@ static table_map get_table_map(List<Item> *items)
   Item_field *item;
   table_map map= 0;
 
-  while ((item= (Item_field *) item_it++)) 
-    map|= item->used_tables();
+  while ((item= (Item_field *) item_it++))
+    map|= item->all_used_tables();
   DBUG_PRINT("info", ("table_map: 0x%08lx", (long) map));
   return map;
 }
@@ -1028,11 +1064,12 @@ static table_map get_table_map(List<Item> *items)
     false  otherwise.
 */
 static
-bool unsafe_key_update(TABLE_LIST *leaves, table_map tables_for_update)
+bool unsafe_key_update(List<TABLE_LIST> leaves, table_map tables_for_update)
 {
-  TABLE_LIST *tl= leaves;
+  List_iterator_fast<TABLE_LIST> it(leaves), it2(leaves);
+  TABLE_LIST *tl, *tl2;
 
-  for (tl= leaves; tl ; tl= tl->next_leaf)
+  while ((tl= it++))
   {
     if (tl->table->map & tables_for_update)
     {
@@ -1048,14 +1085,16 @@ bool unsafe_key_update(TABLE_LIST *leaves, table_map tables_for_update)
       if (!table_partitioned && !primkey_clustered)
         continue;
 
-      for (TABLE_LIST* tl2= tl->next_leaf; tl2 ; tl2= tl2->next_leaf)
+      it2.rewind();
+      while ((tl2= it2++))
       {
         /*
           Look at "next" tables only since all previous tables have
           already been checked
         */
         TABLE *table2= tl2->table;
-        if (table2->map & tables_for_update && table1->s == table2->s)
+        if (tl2 != tl &&
+            table2->map & tables_for_update && table1->s == table2->s)
         {
           // A table is updated through two aliases
           if (table_partitioned &&
@@ -1117,7 +1156,7 @@ int mysql_multi_update_prepare(THD *thd)
 {
   LEX *lex= thd->lex;
   TABLE_LIST *table_list= lex->query_tables;
-  TABLE_LIST *tl, *leaves;
+  TABLE_LIST *tl;
   List<Item> *fields= &lex->select_lex.item_list;
   table_map tables_for_update;
   bool update_view= 0;
@@ -1145,7 +1184,7 @@ int mysql_multi_update_prepare(THD *thd)
        open_tables(thd, &table_list, &table_count,
                    (thd->stmt_arena->is_stmt_prepare() ?
                     MYSQL_OPEN_FORCE_SHARED_MDL : 0))) ||
-      mysql_handle_derived(lex, &mysql_derived_prepare))
+      mysql_handle_derived(lex, DT_INIT))
     DBUG_RETURN(TRUE);
   /*
     setup_tables() need for VIEWs. JOIN::prepare() will call setup_tables()
@@ -1153,11 +1192,20 @@ int mysql_multi_update_prepare(THD *thd)
     call in setup_tables()).
   */
 
+  //We need to merge for insert prior to prepare.
+  if (mysql_handle_derived(lex, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(TRUE);
+  if (mysql_handle_derived(lex, DT_PREPARE))
+    DBUG_RETURN(TRUE);
+
   if (setup_tables_and_check_access(thd, &lex->select_lex.context,
                                     &lex->select_lex.top_join_list,
                                     table_list,
-                                    &lex->select_lex.leaf_tables, FALSE,
-                                    UPDATE_ACL, SELECT_ACL))
+                                    lex->select_lex.leaf_tables, FALSE,
+                                    UPDATE_ACL, SELECT_ACL, FALSE))
+    DBUG_RETURN(TRUE);
+
+  if (lex->select_lex.handle_derived(thd->lex, DT_MERGE))  
     DBUG_RETURN(TRUE);
 
   if (setup_fields_with_no_wrap(thd, 0, *fields, MARK_COLUMNS_WRITE, 0, 0))
@@ -1179,15 +1227,14 @@ int mysql_multi_update_prepare(THD *thd)
 
   thd->table_map_for_update= tables_for_update= get_table_map(fields);
 
-  leaves= lex->select_lex.leaf_tables;
-
-  if (unsafe_key_update(leaves, tables_for_update))
+  if (unsafe_key_update(lex->select_lex.leaf_tables, tables_for_update))
     DBUG_RETURN(true);
 
   /*
     Setup timestamp handling and locking mode
   */
-  for (tl= leaves; tl; tl= tl->next_leaf)
+  List_iterator<TABLE_LIST> ti(lex->select_lex.leaf_tables);
+  while ((tl= ti++))
   {
     TABLE *table= tl->table;
     /* Only set timestamp column if this is not modified */
@@ -1234,7 +1281,7 @@ int mysql_multi_update_prepare(THD *thd)
   for (tl= table_list; tl; tl= tl->next_local)
   {
     /* Check access privileges for table */
-    if (!tl->derived)
+    if (!tl->is_derived())
     {
       uint want_privilege= tl->updating ? UPDATE_ACL : SELECT_ACL;
       if (check_access(thd, want_privilege, tl->db,
@@ -1249,7 +1296,7 @@ int mysql_multi_update_prepare(THD *thd)
   /* check single table update for view compound from several tables */
   for (tl= table_list; tl; tl= tl->next_local)
   {
-    if (tl->effective_algorithm == VIEW_ALGORITHM_MERGE)
+    if (tl->is_merged_derived())
     {
       TABLE_LIST *for_update= 0;
       if (tl->check_single_table(&for_update, tables_for_update, tl))
@@ -1275,7 +1322,8 @@ int mysql_multi_update_prepare(THD *thd)
   */
   lex->select_lex.exclude_from_table_unique_test= TRUE;
   /* We only need SELECT privilege for columns in the values list */
-  for (tl= leaves; tl; tl= tl->next_leaf)
+  ti.rewind();
+  while ((tl= ti++))
   {
     TABLE *table= tl->table;
     TABLE_LIST *tlist;
@@ -1303,15 +1351,10 @@ int mysql_multi_update_prepare(THD *thd)
     further check in multi_update::prepare whether to use record cache.
   */
   lex->select_lex.exclude_from_table_unique_test= FALSE;
- 
-  if (thd->fill_derived_tables() &&
-      mysql_handle_derived(lex, &mysql_derived_filling))
-  {
-    mysql_handle_derived(lex, &mysql_derived_cleanup);
-    DBUG_RETURN(TRUE);
-  }
-  mysql_handle_derived(lex, &mysql_derived_cleanup);
 
+  if (lex->select_lex.save_prep_leaf_tables(thd))
+    DBUG_RETURN(TRUE);
+ 
   DBUG_RETURN (FALSE);
 }
 
@@ -1336,7 +1379,7 @@ bool mysql_multi_update(THD *thd,
   DBUG_ENTER("mysql_multi_update");
 
   if (!(*result= new multi_update(table_list,
-				 thd->lex->select_lex.leaf_tables,
+				 &thd->lex->select_lex.leaf_tables,
 				 fields, values,
 				 handle_duplicates, ignore)))
   {
@@ -1372,7 +1415,7 @@ bool mysql_multi_update(THD *thd,
 
 
 multi_update::multi_update(TABLE_LIST *table_list,
-			   TABLE_LIST *leaves_list,
+                           List<TABLE_LIST> *leaves_list,
 			   List<Item> *field_list, List<Item> *value_list,
 			   enum enum_duplicates handle_duplicates_arg,
                            bool ignore_arg)
@@ -1390,6 +1433,7 @@ multi_update::multi_update(TABLE_LIST *table_list,
 
 int multi_update::prepare(List<Item> &not_used_values,
 			  SELECT_LEX_UNIT *lex_unit)
+
 {
   TABLE_LIST *table_ref;
   SQL_I_List<TABLE_LIST> update;
@@ -1399,6 +1443,7 @@ int multi_update::prepare(List<Item> &not_used_values,
   List_iterator_fast<Item> value_it(*values);
   uint i, max_fields;
   uint leaf_table_count= 0;
+  List_iterator<TABLE_LIST> ti(*leaves);
   DBUG_ENTER("multi_update::prepare");
 
   thd->count_cuted_fields= CHECK_FIELD_WARN;
@@ -1418,7 +1463,7 @@ int multi_update::prepare(List<Item> &not_used_values,
     TABLE::tmp_set by pointing TABLE::read_set to it and then restore it after
     setup_fields().
   */
-  for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+  while ((table_ref= ti++))
   {
     TABLE *table= table_ref->table;
     if (tables_to_update & table->map)
@@ -1436,7 +1481,8 @@ int multi_update::prepare(List<Item> &not_used_values,
 
   int error= setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0);
 
-  for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+  ti.rewind();
+  while ((table_ref= ti++))
   {
     TABLE *table= table_ref->table;
     if (tables_to_update & table->map)
@@ -1448,10 +1494,9 @@ int multi_update::prepare(List<Item> &not_used_values,
         update force the table handler to retrieve write-only fields to be able
         to compare records and detect data change.
         */
-      if (table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ &&
-          table->timestamp_field &&
-          (table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_UPDATE ||
-           table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_BOTH))
+      if ((table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) &&
+          (((uint) table->timestamp_field_type) &
+           TIMESTAMP_AUTO_SET_ON_UPDATE))
         bitmap_union(table->read_set, table->write_set);
     }
   }
@@ -1466,7 +1511,8 @@ int multi_update::prepare(List<Item> &not_used_values,
   */
 
   update.empty();
-  for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+  ti.rewind();
+  while ((table_ref= ti++))
   {
     /* TODO: add support of view of join support */
     TABLE *table=table_ref->table;
@@ -1692,9 +1738,9 @@ loop_end:
     {
       table_map unupdated_tables= table_ref->check_option->used_tables() &
                                   ~first_table_for_update->map;
-      for (TABLE_LIST *tbl_ref =leaves;
-           unupdated_tables && tbl_ref;
-           tbl_ref= tbl_ref->next_leaf)
+      List_iterator<TABLE_LIST> ti(*leaves);
+      TABLE_LIST *tbl_ref;
+      while ((tbl_ref= ti++) && unupdated_tables)
       {
         if (unupdated_tables & tbl_ref->table->map)
           unupdated_tables&= ~tbl_ref->table->map;
@@ -1720,7 +1766,8 @@ loop_end:
     do
     {
       Field_string *field= new Field_string(tbl->file->ref_length, 0,
-                                            tbl->alias, &my_charset_bin);
+                                            tbl->alias.c_ptr(),
+                                            &my_charset_bin);
       if (!field)
         DBUG_RETURN(1);
       field->init(tbl);
@@ -1792,7 +1839,7 @@ multi_update::~multi_update()
 }
 
 
-bool multi_update::send_data(List<Item> &not_used_values)
+int multi_update::send_data(List<Item> &not_used_values)
 {
   TABLE_LIST *cur_table;
   DBUG_ENTER("multi_update::send_data");
@@ -1818,6 +1865,14 @@ bool multi_update::send_data(List<Item> &not_used_values)
 
     if (table == table_to_update)
     {
+      /*
+        We can use compare_record() to optimize away updates if
+        the table handler is returning all columns OR if
+        if all updated columns are read
+      */
+      bool can_compare_record;
+      can_compare_record= records_are_comparable(table);
+
       table->status|= STATUS_UPDATED;
       store_record(table,record[1]);
       if (fill_record_n_invoke_before_triggers(thd, *fields_for_table[offset],
@@ -1832,7 +1887,7 @@ bool multi_update::send_data(List<Item> &not_used_values)
       */
       table->auto_increment_field_not_null= FALSE;
       found++;
-      if (!records_are_comparable(table) || compare_records(table))
+      if (!can_compare_record || compare_record(table))
       {
 	int error;
         if ((error= cur_table->view_check_option(thd, ignore)) !=
@@ -1926,15 +1981,17 @@ bool multi_update::send_data(List<Item> &not_used_values)
       /* Store regular updated fields in the row. */
       fill_record(thd,
                   tmp_table->field + 1 + unupdated_check_opt_tables.elements,
-                  *values_for_table[offset], 1);
+                  *values_for_table[offset], TRUE, FALSE);
 
       /* Write row, ignoring duplicated updates to a row */
-      error= tmp_table->file->ha_write_row(tmp_table->record[0]);
+      error= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]);
       if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE)
       {
         if (error &&
-            create_myisam_from_heap(thd, tmp_table,
-                                    tmp_table_param + offset, error, 1))
+            create_internal_tmp_table_from_heap(thd, tmp_table,
+                                         tmp_table_param[offset].start_recinfo,
+                                         &tmp_table_param[offset].recinfo,
+                                         error, 1))
         {
           do_update= 0;
           DBUG_RETURN(1);			// Not a table_is_full error
@@ -2012,7 +2069,7 @@ int multi_update::do_updates()
   TABLE_LIST *cur_table;
   int local_error= 0;
   ha_rows org_updated;
-  TABLE *table, *tmp_table;
+  TABLE *table, *tmp_table, *err_table;
   List_iterator_fast<TABLE> check_opt_it(unupdated_check_opt_tables);
   DBUG_ENTER("multi_update::do_updates");
 
@@ -2021,6 +2078,7 @@ int multi_update::do_updates()
     DBUG_RETURN(0);
   for (cur_table= update_tables; cur_table; cur_table= cur_table->next_local)
   {
+    bool can_compare_record;
     uint offset= cur_table->shared;
 
     table = cur_table->table;
@@ -2029,14 +2087,21 @@ int multi_update::do_updates()
     org_updated= updated;
     tmp_table= tmp_tables[cur_table->shared];
     tmp_table->file->extra(HA_EXTRA_CACHE);	// Change to read cache
-    (void) table->file->ha_rnd_init(0);
+    if ((local_error= table->file->ha_rnd_init(0)))
+    {
+      err_table= table;
+      goto err;
+    }
     table->file->extra(HA_EXTRA_NO_CACHE);
 
     check_opt_it.rewind();
     while(TABLE *tbl= check_opt_it++)
     {
-      if (tbl->file->ha_rnd_init(1))
+      if ((local_error= tbl->file->ha_rnd_init(1)))
+      {
+        err_table= tbl;
         goto err;
+      }
       tbl->file->extra(HA_EXTRA_CACHE);
     }
 
@@ -2044,9 +2109,11 @@ int multi_update::do_updates()
       Setup copy functions to copy fields from temporary table
     */
     List_iterator_fast<Item> field_it(*fields_for_table[offset]);
-    Field **field= tmp_table->field + 
-                   1 + unupdated_check_opt_tables.elements; // Skip row pointers
+    Field **field;
     Copy_field *copy_field_ptr= copy_field, *copy_field_end;
+
+    /* Skip row pointers */
+    field= tmp_table->field + 1 + unupdated_check_opt_tables.elements;
     for ( ; *field ; field++)
     {
       Item_field *item= (Item_field* ) field_it++;
@@ -2054,19 +2121,28 @@ int multi_update::do_updates()
     }
     copy_field_end=copy_field_ptr;
 
-    if ((local_error = tmp_table->file->ha_rnd_init(1)))
+    if ((local_error= tmp_table->file->ha_rnd_init(1)))
+    {
+      err_table= tmp_table;
       goto err;
+    }
+
+    can_compare_record= records_are_comparable(table);
 
     for (;;)
     {
       if (thd->killed && trans_safe)
-	goto err;
-      if ((local_error=tmp_table->file->rnd_next(tmp_table->record[0])))
+      {
+        thd->fatal_error();
+	goto err2;
+      }
+      if ((local_error= tmp_table->file->ha_rnd_next(tmp_table->record[0])))
       {
 	if (local_error == HA_ERR_END_OF_FILE)
 	  break;
 	if (local_error == HA_ERR_RECORD_DELETED)
 	  continue;				// May happen on dup key
+        err_table= tmp_table;
 	goto err;
       }
 
@@ -2076,12 +2152,15 @@ int multi_update::do_updates()
       uint field_num= 0;
       do
       {
-        if((local_error=
-              tbl->file->rnd_pos(tbl->record[0],
-                                (uchar *) tmp_table->field[field_num]->ptr)))
+        if ((local_error=
+             tbl->file->ha_rnd_pos(tbl->record[0],
+                                   (uchar *) tmp_table->field[field_num]->ptr)))
+        {
+          err_table= tbl;
           goto err;
+        }
         field_num++;
-      } while((tbl= check_opt_it++));
+      } while ((tbl= check_opt_it++));
 
       table->status|= STATUS_UPDATED;
       store_record(table,record[1]);
@@ -2097,7 +2176,7 @@ int multi_update::do_updates()
                                             TRG_ACTION_BEFORE, TRUE))
         goto err2;
 
-      if (!records_are_comparable(table) || compare_records(table))
+      if (!can_compare_record || compare_record(table))
       {
         int error;
         if ((error= cur_table->view_check_option(thd, ignore)) !=
@@ -2106,7 +2185,10 @@ int multi_update::do_updates()
           if (error == VIEW_CHECK_SKIP)
             continue;
           else if (error == VIEW_CHECK_ERROR)
-            goto err;
+          {
+            thd->fatal_error();
+            goto err2;
+          }
         }
 	if ((local_error=table->file->ha_update_row(table->record[1],
 						    table->record[0])) &&
@@ -2114,7 +2196,10 @@ int multi_update::do_updates()
 	{
 	  if (!ignore ||
               table->file->is_fatal_error(local_error, HA_CHECK_DUP_KEY))
+          {
+            err_table= table;
 	    goto err;
+          }
 	}
         if (local_error != HA_ERR_RECORD_IS_THE_SAME)
           updated++;
@@ -2149,8 +2234,8 @@ int multi_update::do_updates()
 
 err:
   {
-    prepare_record_for_error_message(local_error, table);
-    table->file->print_error(local_error,MYF(ME_FATALERROR));
+    prepare_record_for_error_message(local_error, err_table);
+    err_table->file->print_error(local_error,MYF(ME_FATALERROR));
   }
 
 err2:
diff --git a/sql/sql_update.h b/sql/sql_update.h
index ee412f80896..64029c5d634 100644
--- a/sql/sql_update.h
+++ b/sql/sql_update.h
@@ -39,6 +39,6 @@ bool mysql_multi_update(THD *thd, TABLE_LIST *table_list,
                         SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex,
                         multi_update **result);
 bool records_are_comparable(const TABLE *table);
-bool compare_records(const TABLE *table);
+bool compare_record(const TABLE *table);
 
 #endif /* SQL_UPDATE_INCLUDED */
diff --git a/sql/sql_view.cc b/sql/sql_view.cc
index 6051aa028c7..cb1c57ea0ba 100644
--- a/sql/sql_view.cc
+++ b/sql/sql_view.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2004, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -228,7 +229,7 @@ fill_defined_view_parts (THD *thd, TABLE_LIST *view)
     view->definer.user= decoy.definer.user;
     lex->definer= &view->definer;
   }
-  if (lex->create_view_algorithm == VIEW_ALGORITHM_UNDEFINED)
+  if (lex->create_view_algorithm == DTYPE_ALGORITHM_UNDEFINED)
     lex->create_view_algorithm= (uint8) decoy.algorithm;
   if (lex->create_view_suid == VIEW_SUID_DEFAULT)
     lex->create_view_suid= decoy.view_suid ? 
@@ -840,14 +841,13 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view,
     ulong sql_mode= thd->variables.sql_mode & MODE_ANSI_QUOTES;
     thd->variables.sql_mode&= ~MODE_ANSI_QUOTES;
 
-    lex->unit.print(&view_query, QT_ORDINARY);
+    lex->unit.print(&view_query, QT_VIEW_INTERNAL);
     lex->unit.print(&is_query,
                     enum_query_type(QT_TO_SYSTEM_CHARSET | QT_WITHOUT_INTRODUCERS));
 
     thd->variables.sql_mode|= sql_mode;
   }
-  DBUG_PRINT("info",
-             ("View: %*.s", (int) view_query.length(), view_query.ptr()));
+  DBUG_PRINT("info", ("View: %s", view_query.c_ptr_safe()));
 
   /* fill structure */
   view->source= thd->lex->create_view_select;
@@ -875,7 +875,7 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view,
   {
     push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_VIEW_MERGE,
                  ER(ER_WARN_VIEW_MERGE));
-    lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+    lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
   }
   view->algorithm= lex->create_view_algorithm;
   view->definer.user= lex->definer->user;
@@ -1062,10 +1062,12 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
   bool parse_status;
   bool result, view_is_mergeable;
   TABLE_LIST *UNINIT_VAR(view_main_select_tables);
-
   DBUG_ENTER("mysql_make_view");
   DBUG_PRINT("info", ("table: 0x%lx (%s)", (ulong) table, table->table_name));
 
+  LINT_INIT(parse_status);
+  LINT_INIT(view_select);
+
   if (table->view)
   {
     /*
@@ -1203,7 +1205,7 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
   table->view= lex= thd->lex= (LEX*) new(thd->mem_root) st_lex_local;
 
   {
-    char old_db_buf[NAME_LEN+1];
+    char old_db_buf[SAFE_NAME_LEN+1];
     LEX_STRING old_db= { old_db_buf, sizeof(old_db_buf) };
     bool dbchanged;
     Parser_state parser_state;
@@ -1229,7 +1231,7 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
       + MODE_PIPES_AS_CONCAT          affect expression parsing
       + MODE_ANSI_QUOTES              affect expression parsing
       + MODE_IGNORE_SPACE             affect expression parsing
-      - MODE_NOT_USED                 not used :)
+      - MODE_IGNORE_BAD_TABLE_OPTIONS affect only CREATE/ALTER TABLE parsing
       * MODE_ONLY_FULL_GROUP_BY       affect execution
       * MODE_NO_UNSIGNED_SUBTRACTION  affect execution
       - MODE_NO_DIR_IN_CREATE         affect table creation only
@@ -1479,7 +1481,7 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
 
       List_iterator_fast<TABLE_LIST> ti(view_select->top_join_list);
 
-      table->effective_algorithm= VIEW_ALGORITHM_MERGE;
+      table->derived_type= VIEW_ALGORITHM_MERGE;
       DBUG_PRINT("info", ("algorithm: MERGE"));
       table->updatable= (table->updatable_view != 0);
       table->effective_with_check=
@@ -1493,74 +1495,31 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
       /* prepare view context */
       lex->select_lex.context.resolve_in_table_list_only(view_main_select_tables);
       lex->select_lex.context.outer_context= 0;
-      lex->select_lex.context.select_lex= table->select_lex;
       lex->select_lex.select_n_having_items+=
         table->select_lex->select_n_having_items;
 
-      /*
-        Tables of the main select of the view should be marked as belonging
-        to the same select as original view (again we can use LEX::select_lex
-        for this purprose because we don't support MERGE algorithm for views
-        with unions).
-      */
-      for (tbl= lex->select_lex.get_table_list(); tbl; tbl= tbl->next_local)
-        tbl->select_lex= table->select_lex;
-
-      {
-        if (view_main_select_tables->next_local)
-        {
-          table->multitable_view= TRUE;
-          if (table->belong_to_view)
-           table->belong_to_view->multitable_view= TRUE;
-        }
-        /* make nested join structure for view tables */
-        NESTED_JOIN *nested_join;
-        if (!(nested_join= table->nested_join=
-              (NESTED_JOIN *) thd->calloc(sizeof(NESTED_JOIN))))
-          goto err;
-        nested_join->join_list= view_select->top_join_list;
-
-        /* re-nest tables of VIEW */
-        ti.rewind();
-        while ((tbl= ti++))
-        {
-          tbl->join_list= &nested_join->join_list;
-          tbl->embedding= table;
-        }
-      }
-
-      /* Store WHERE clause for post-processing in setup_underlying */
       table->where= view_select->where;
-      /*
-        Add subqueries units to SELECT into which we merging current view.
-        unit(->next)* chain starts with subqueries that are used by this
-        view and continues with subqueries that are used by other views.
-        We must not add any subquery twice (otherwise we'll form a loop),
-        to do this we remember in end_unit the first subquery that has
-        been already added.
-
-        NOTE: we do not support UNION here, so we take only one select
-      */
-      SELECT_LEX_NODE *end_unit= table->select_lex->slave;
-      SELECT_LEX_UNIT *next_unit;
-      for (SELECT_LEX_UNIT *unit= lex->select_lex.first_inner_unit();
-           unit;
-           unit= next_unit)
-      {
-        if (unit == end_unit)
-          break;
-        SELECT_LEX_NODE *save_slave= unit->slave;
-        next_unit= unit->next_unit();
-        unit->include_down(table->select_lex);
-        unit->slave= save_slave; // fix include_down initialisation
-      }
 
       /* 
         We can safely ignore the VIEW's ORDER BY if we merge into union 
         branch, as order is not important there.
       */
-      if (!table->select_lex->master_unit()->is_union())
+      if (!table->select_lex->master_unit()->is_union() &&
+          table->select_lex->order_list.elements == 0)
         table->select_lex->order_list.push_back(&lex->select_lex.order_list);
+      else
+      {
+        if (old_lex->sql_command == SQLCOM_SELECT &&
+            (old_lex->describe & DESCRIBE_EXTENDED) &&
+            lex->select_lex.order_list.elements &&
+            !table->select_lex->master_unit()->is_union())
+        {
+          push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+                              ER_VIEW_ORDERBY_IGNORED,
+                              ER(ER_VIEW_ORDERBY_IGNORED),
+                              table->db, table->table_name);
+        }
+      }
       /*
 	This SELECT_LEX will be linked in global SELECT_LEX list
 	to make it processed by mysql_handle_derived(),
@@ -1570,23 +1529,22 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
       goto ok;
     }
 
-    table->effective_algorithm= VIEW_ALGORITHM_TMPTABLE;
+    table->derived_type= VIEW_ALGORITHM_TMPTABLE;
     DBUG_PRINT("info", ("algorithm: TEMPORARY TABLE"));
     view_select->linkage= DERIVED_TABLE_TYPE;
     table->updatable= 0;
     table->effective_with_check= VIEW_CHECK_NONE;
     old_lex->subqueries= TRUE;
 
-    /* SELECT tree link */
-    lex->unit.include_down(table->select_lex);
-    lex->unit.slave= view_select; // fix include_down initialisation
-
     table->derived= &lex->unit;
   }
   else
     goto err;
 
 ok:
+  /* SELECT tree link */
+  lex->unit.include_down(table->select_lex);
+  lex->unit.slave= view_select; // fix include_down initialisation
   /* global SELECT list linking */
   end= view_select;	// primary SELECT_LEX is always last
   end->link_next= old_lex->all_selects_list;
@@ -1716,7 +1674,7 @@ bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode)
   }
   if (non_existant_views.length())
   {
-    my_error(ER_BAD_TABLE_ERROR, MYF(0), non_existant_views.c_ptr());
+    my_error(ER_BAD_TABLE_ERROR, MYF(0), non_existant_views.c_ptr_safe());
   }
 
   something_wrong= error || wrong_object_name || non_existant_views.length();
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 145c6c86714..d82936c87a9 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -1,5 +1,6 @@
 /*
-   Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -36,6 +37,7 @@
 #define YYINITDEPTH 100
 #define YYMAXDEPTH 3200                        /* Because of 64K stack */
 #define Lex (YYTHD->lex)
+
 #define Select Lex->current_select
 #include "sql_priv.h"
 #include "unireg.h"                    // REQUIRED: for other includes
@@ -58,6 +60,7 @@
 #include "sql_partition_admin.h"               // Alter_table_*_partition_stmt
 #include "sql_signal.h"
 #include "event_parse_data.h"
+#include "create_options.h"
 #include <myisam.h>
 #include <myisammrg.h>
 #include "keycaches.h"
@@ -149,7 +152,7 @@ void my_parse_error(const char *s)
     yytext= "";
 
   /* Push an error into the error stack */
-  ErrConvString err(yytext, thd->variables.character_set_client);
+  ErrConvString err(yytext, strlen(yytext), thd->variables.character_set_client);
   my_printf_error(ER_PARSE_ERROR,  ER(ER_PARSE_ERROR), MYF(0), s,
                   err.ptr(), lip->yylineno);
 }
@@ -708,6 +711,7 @@ static bool add_create_index_prepare (LEX *lex, Table_ident *table)
   lex->alter_info.flags= ALTER_ADD_INDEX;
   lex->col_list.empty();
   lex->change= NullS;
+  lex->option_list= NULL;
   return FALSE;
 }
 
@@ -717,7 +721,7 @@ static bool add_create_index (LEX *lex, Key::Keytype type,
 {
   Key *key;
   key= new Key(type, name, info ? info : &lex->key_create_info, generated, 
-               lex->col_list);
+               lex->col_list, lex->option_list);
   if (key == NULL)
     return TRUE;
 
@@ -756,6 +760,7 @@ static bool add_create_index (LEX *lex, Key::Keytype type,
   enum enum_tx_isolation tx_isolation;
   enum Cast_target cast_type;
   enum Item_udftype udf_type;
+  enum ha_choice choice;
   CHARSET_INFO *charset;
   thr_lock_type lock_type;
   interval_type interval, interval_time_st;
@@ -773,6 +778,8 @@ static bool add_create_index (LEX *lex, Key::Keytype type,
   enum Foreign_key::fk_option m_fk_option;
   enum enum_yes_no_unknown m_yes_no_unk;
   Diag_condition_item_name diag_condition_item_name;
+  DYNCALL_CREATE_DEF *dyncol_def;
+  List<DYNCALL_CREATE_DEF> *dyncol_def_list;
 }
 
 %{
@@ -781,10 +788,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 
 %pure_parser                                    /* We have threads */
 /*
-  Currently there are 168 shift/reduce conflicts.
+  Currently there are 171 shift/reduce conflicts.
   We should not introduce new conflicts any more.
 */
-%expect 168
+%expect 171
 
 /*
    Comments for TOKENS.
@@ -814,6 +821,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  ALGORITHM_SYM
 %token  ALL                           /* SQL-2003-R */
 %token  ALTER                         /* SQL-2003-R */
+%token  ALWAYS_SYM
 %token  ANALYZE_SYM
 %token  AND_AND_SYM                   /* OPERATOR */
 %token  AND_SYM                       /* SQL-2003-R */
@@ -860,17 +868,25 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  CHANGED
 %token  CHARSET
 %token  CHAR_SYM                      /* SQL-2003-R */
+%token  CHECKPOINT_SYM
 %token  CHECKSUM_SYM
 %token  CHECK_SYM                     /* SQL-2003-R */
 %token  CIPHER_SYM
 %token  CLASS_ORIGIN_SYM              /* SQL-2003-N */
 %token  CLIENT_SYM
+%token  CLIENT_STATS_SYM
 %token  CLOSE_SYM                     /* SQL-2003-R */
 %token  COALESCE                      /* SQL-2003-N */
 %token  CODE_SYM
 %token  COLLATE_SYM                   /* SQL-2003-R */
 %token  COLLATION_SYM                 /* SQL-2003-N */
 %token  COLUMNS
+%token  COLUMN_ADD_SYM
+%token  COLUMN_CREATE_SYM
+%token  COLUMN_DELETE_SYM
+%token  COLUMN_EXISTS_SYM
+%token  COLUMN_GET_SYM
+%token  COLUMN_LIST_SYM
 %token  COLUMN_SYM                    /* SQL-2003-R */
 %token  COLUMN_NAME_SYM               /* SQL-2003-N */
 %token  COMMENT_SYM
@@ -988,6 +1004,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  FUNCTION_SYM                  /* SQL-2003-R */
 %token  GE
 %token  GENERAL
+%token  GENERATED_SYM
 %token  GEOMETRYCOLLECTION
 %token  GEOMETRY_SYM
 %token  GET_FORMAT                    /* MYSQL-FUNC */
@@ -1018,6 +1035,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  IMPORT
 %token  INDEXES
 %token  INDEX_SYM
+%token	INDEX_STATS_SYM
 %token  INFILE
 %token  INITIAL_SIZE_SYM
 %token  INNER_SYM                     /* SQL-2003-R */
@@ -1150,6 +1168,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  ON                            /* SQL-2003-R */
 %token  ONE_SHOT_SYM
 %token  ONE_SYM
+%token  ONLINE_SYM
 %token  OPEN_SYM                      /* SQL-2003-R */
 %token  OPTIMIZE
 %token  OPTIONS_SYM
@@ -1165,13 +1184,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  OWNER_SYM
 %token  PACK_KEYS_SYM
 %token  PAGE_SYM
+%token  PAGE_CHECKSUM_SYM
 %token  PARAM_MARKER
 %token  PARSER_SYM
+%token  PARSE_VCOL_EXPR_SYM
 %token  PARTIAL                       /* SQL-2003-N */
 %token  PARTITIONING_SYM
 %token  PARTITIONS_SYM
 %token  PARTITION_SYM                 /* SQL-2003-R */
 %token  PASSWORD
+%token  PERSISTENT_SYM
 %token  PHASE_SYM
 %token  PLUGINS_SYM
 %token  PLUGIN_SYM
@@ -1312,6 +1334,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  TABLES
 %token  TABLESPACE
 %token  TABLE_REF_PRIORITY
+%token  TABLE_STATS_SYM
 %token  TABLE_SYM                     /* SQL-2003-R */
 %token  TABLE_CHECKSUM_SYM
 %token  TABLE_NAME_SYM                /* SQL-2003-N */
@@ -1332,6 +1355,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  TO_SYM                        /* SQL-2003-R */
 %token  TRAILING                      /* SQL-2003-R */
 %token  TRANSACTION_SYM
+%token  TRANSACTIONAL_SYM
 %token  TRIGGERS_SYM
 %token  TRIGGER_SYM                   /* SQL-2003-R */
 %token  TRIM                          /* SQL-2003-N */
@@ -1359,6 +1383,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  UPGRADE_SYM
 %token  USAGE                         /* SQL-2003-N */
 %token  USER                          /* SQL-2003-R */
+%token  USER_STATS_SYM
 %token  USE_FRM
 %token  USE_SYM
 %token  USING                         /* SQL-2003-R */
@@ -1373,7 +1398,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  VARIANCE_SYM
 %token  VARYING                       /* SQL-2003-R */
 %token  VAR_SAMP_SYM
+%token  VIA_SYM
 %token  VIEW_SYM                      /* SQL-2003-N */
+%token  VIRTUAL_SYM
 %token  WAIT_SYM
 %token  WARNINGS
 %token  WEEK_SYM
@@ -1394,6 +1421,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  YEAR_SYM                      /* SQL-2003-R */
 %token  ZEROFILL
 
+%token IMPOSSIBLE_ACTION		/* To avoid warning for yyerrlab1 */
+
 %left   JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT
 /* A dummy token to force the priority of table_ref production in a join. */
 %left   TABLE_REF_PRIORITY
@@ -1441,11 +1470,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
         table_option opt_if_not_exists opt_no_write_to_binlog
         opt_temporary all_or_any opt_distinct
         opt_ignore_leaves fulltext_options spatial_type union_option
-        start_transaction_opts
+        start_transaction_opts field_def
         union_opt select_derived_init option_type2
         opt_natural_language_mode opt_query_expansion
         opt_ev_status opt_ev_on_completion ev_on_completion opt_ev_comment
         ev_alter_on_schedule_completion opt_ev_rename_to opt_ev_sql_stmt
+        optional_flush_tables_arguments opt_dyncol_type dyncol_type
+        opt_time_precision
 
 %type <m_yes_no_unk>
         opt_chain opt_release
@@ -1459,6 +1490,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %type <ulonglong_number>
         ulonglong_num real_ulonglong_num size_number
 
+%type <choice> choice
+
 %type <lock_type>
         replace_lock_option opt_low_priority insert_lock_option load_data_lock
 
@@ -1547,6 +1580,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 
 %type <boolfunc2creator> comp_op
 
+%type <dyncol_def> dyncall_create_element
+
+%type <dyncol_def_list> dyncall_create_list
+
 %type <NONE>
         query verb_clause create change select do drop insert replace insert2
         insert_values update delete truncate rename
@@ -1597,10 +1634,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
         init_key_options normal_key_options normal_key_opts all_key_opt 
         spatial_key_options fulltext_key_options normal_key_opt 
         fulltext_key_opt spatial_key_opt fulltext_key_opts spatial_key_opts
+	keep_gcc_happy
         key_using_alg
         part_column_list
         server_def server_options_list server_option
         definer_opt no_definer definer
+        parse_vcol_expr vcol_opt_specifier vcol_opt_attribute
+        vcol_opt_attribute_list vcol_attribute
 END_OF_INPUT
 
 %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt
@@ -1730,11 +1770,13 @@ statement:
         | help
         | insert
         | install
+	| keep_gcc_happy
+        | keycache
         | kill
         | load
         | lock
         | optimize
-        | keycache
+        | parse_vcol_expr
         | partition_entry
         | preload
         | prepare
@@ -1860,15 +1902,7 @@ help:
 change:
           CHANGE MASTER_SYM TO_SYM
           {
-            LEX *lex = Lex;
-            lex->sql_command = SQLCOM_CHANGE_MASTER;
-            bzero((char*) &lex->mi, sizeof(lex->mi));
-            /*
-              resetting flags that can left from the previous CHANGE MASTER
-            */
-            lex->mi.repl_ignore_server_ids_opt= LEX_MASTER_INFO::LEX_MI_UNCHANGED;
-            my_init_dynamic_array(&Lex->mi.repl_ignore_server_ids,
-                                  sizeof(::server_id), 16, 16);
+            Lex->sql_command = SQLCOM_CHANGE_MASTER;
           }
           master_defs
           {}
@@ -2054,7 +2088,7 @@ create:
               push_warning_printf(YYTHD, MYSQL_ERROR::WARN_LEVEL_WARN,
                                   ER_WARN_USING_OTHER_HANDLER,
                                   ER(ER_WARN_USING_OTHER_HANDLER),
-                                  ha_resolve_storage_engine_name(lex->create_info.db_type),
+                                  hton_name(lex->create_info.db_type)->str,
                                   $5->table.str);
             }
             create_table_set_open_action_and_adjust_tables(lex);
@@ -2106,7 +2140,7 @@ create:
         | CREATE
           {
             Lex->create_view_mode= VIEW_CREATE_NEW;
-            Lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+            Lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
             Lex->create_view_suid= TRUE;
           }
           view_or_trigger_or_sp_or_event
@@ -2248,7 +2282,7 @@ opt_ev_status:
 ev_starts:
           /* empty */
           {
-            Item *item= new (YYTHD->mem_root) Item_func_now_local();
+            Item *item= new (YYTHD->mem_root) Item_func_now_local(0);
             if (item == NULL)
               MYSQL_YYABORT;
             Lex->event_parse_data->item_starts= item;
@@ -2529,6 +2563,7 @@ sp_init_param:
 
             lex->interval_list.empty();
             lex->uint_geom_type= 0;
+            lex->vcol_info= 0;
           }
         ;
 
@@ -4249,7 +4284,11 @@ create2:
         ;
 
 create2a:
-          create_field_list ')' opt_create_table_options
+          create_field_list ')'
+          {
+            Lex->create_info.option_list= NULL;
+          }
+          opt_create_table_options
           opt_create_partitioning
           create3 {}
         |  opt_create_partitioning
@@ -4929,6 +4968,12 @@ opt_part_option:
             part_info->curr_part_elem->engine_type= $4;
             part_info->default_engine_type= $4;
           }
+        | CONNECTION_SYM opt_equal TEXT_STRING_sys
+          {
+            LEX *lex= Lex;
+            lex->part_info->curr_part_elem->connect_string.str= $3.str;
+            lex->part_info->curr_part_elem->connect_string.length= $3.length;
+          }
         | NODEGROUP_SYM opt_equal real_ulong_num
           { Lex->part_info->curr_part_elem->nodegroup_id= (uint16) $3; }
         | MAX_ROWS opt_equal real_ulonglong_num
@@ -5099,6 +5144,11 @@ create_table_option:
              Lex->create_info.table_options|= $3 ? HA_OPTION_CHECKSUM : HA_OPTION_NO_CHECKSUM;
              Lex->create_info.used_fields|= HA_CREATE_USED_CHECKSUM;
           }
+        | PAGE_CHECKSUM_SYM opt_equal choice
+          {
+            Lex->create_info.used_fields|= HA_CREATE_USED_PAGE_CHECKSUM;
+            Lex->create_info.page_checksum= $3;
+          }
         | DELAY_KEY_WRITE_SYM opt_equal ulong_num
           {
             Lex->create_info.table_options|= $3 ? HA_OPTION_DELAY_KEY_WRITE : HA_OPTION_NO_DELAY_KEY_WRITE;
@@ -5169,6 +5219,35 @@ create_table_option:
             Lex->create_info.used_fields|= HA_CREATE_USED_KEY_BLOCK_SIZE;
             Lex->create_info.key_block_size= $3;
           }
+        | TRANSACTIONAL_SYM opt_equal choice
+          {
+	    Lex->create_info.used_fields|= HA_CREATE_USED_TRANSACTIONAL;
+            Lex->create_info.transactional= $3;
+          }
+        | IDENT_sys equal TEXT_STRING_sys
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, true, &Lex->create_info.option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal ident
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, false, &Lex->create_info.option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal real_ulonglong_num
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, &Lex->create_info.option_list,
+                                  &Lex->option_list_last, YYTHD->mem_root);
+          }
+        | IDENT_sys equal DEFAULT
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, &Lex->create_info.option_list,
+                                  &Lex->option_list_last);
+          }
         ;
 
 default_charset:
@@ -5250,6 +5329,7 @@ row_types:
         | COMPRESSED_SYM { $$= ROW_TYPE_COMPRESSED; }
         | REDUNDANT_SYM  { $$= ROW_TYPE_REDUNDANT; }
         | COMPACT_SYM    { $$= ROW_TYPE_COMPACT; }
+        | PAGE_SYM       { $$= ROW_TYPE_PAGE; }
         ;
 
 merge_insert_types:
@@ -5297,25 +5377,33 @@ column_def:
         ;
 
 key_def:
-          normal_key_type opt_ident key_alg '(' key_list ')' normal_key_options
+          normal_key_type opt_ident key_alg '(' key_list ')'
+          { Lex->option_list= NULL; }
+          normal_key_options
           {
             if (add_create_index (Lex, $1, $2))
               MYSQL_YYABORT;
           }
         | fulltext opt_key_or_index opt_ident init_key_options 
-            '(' key_list ')' fulltext_key_options
+            '(' key_list ')'
+          { Lex->option_list= NULL; }
+            fulltext_key_options
           {
             if (add_create_index (Lex, $1, $3))
               MYSQL_YYABORT;
           }
         | spatial opt_key_or_index opt_ident init_key_options 
-            '(' key_list ')' spatial_key_options
+            '(' key_list ')'
+          { Lex->option_list= NULL; }
+            spatial_key_options
           {
             if (add_create_index (Lex, $1, $3))
               MYSQL_YYABORT;
           }
         | opt_constraint constraint_key_type opt_ident key_alg
-          '(' key_list ')' normal_key_options
+          '(' key_list ')'
+          { Lex->option_list= NULL; }
+          normal_key_options
           {
             if (add_create_index (Lex, $2, $3.str ? $3 : $1))
               MYSQL_YYABORT;
@@ -5332,6 +5420,7 @@ key_def:
             if (key == NULL)
               MYSQL_YYABORT;
             lex->alter_info.key_list.push_back(key);
+            lex->option_list= NULL;
             if (add_create_index (lex, Key::MULTIPLE, $1.str ? $1 : $4,
                                   &default_key_create_info, 1))
               MYSQL_YYABORT;
@@ -5371,8 +5460,10 @@ field_spec:
             lex->default_value= lex->on_update_value= 0;
             lex->comment=null_lex_str;
             lex->charset=NULL;
+	    lex->vcol_info= 0;
+            lex->option_list= NULL;
           }
-          type opt_attribute
+          field_def
           {
             LEX *lex=Lex;
             if (add_field_to_list(lex->thd, &$1, (enum enum_field_types) $3,
@@ -5380,8 +5471,98 @@ field_spec:
                                   lex->default_value, lex->on_update_value, 
                                   &lex->comment,
                                   lex->change,&lex->interval_list,lex->charset,
-                                  lex->uint_geom_type))
+                                  lex->uint_geom_type,
+                                  lex->vcol_info, lex->option_list))
+              MYSQL_YYABORT;
+          }
+        ;
+
+field_def:
+          type opt_attribute {}
+        | type opt_generated_always AS '(' virtual_column_func ')'
+          vcol_opt_specifier
+          vcol_opt_attribute
+          {
+            $$= (enum enum_field_types)MYSQL_TYPE_VIRTUAL;
+            Lex->vcol_info->set_field_type((enum enum_field_types) $1);
+          }
+        ;
+
+opt_generated_always:
+          /* empty */
+        | GENERATED_SYM ALWAYS_SYM {}
+        ;
+
+vcol_opt_specifier:
+          /* empty */
+          {
+            Lex->vcol_info->set_stored_in_db_flag(FALSE);
+          }
+        | VIRTUAL_SYM
+          {
+            Lex->vcol_info->set_stored_in_db_flag(FALSE);
+          }
+        | PERSISTENT_SYM
+          {
+            Lex->vcol_info->set_stored_in_db_flag(TRUE);
+          }
+        ;
+
+vcol_opt_attribute:
+          /* empty */ {}
+        | vcol_opt_attribute_list {}
+        ;
+
+vcol_opt_attribute_list:
+          vcol_opt_attribute_list vcol_attribute {}
+        | vcol_attribute
+        ;
+
+vcol_attribute:
+          UNIQUE_SYM
+          {
+            LEX *lex=Lex;
+            lex->type|= UNIQUE_FLAG;
+            lex->alter_info.flags|= ALTER_ADD_INDEX;
+          }
+        | UNIQUE_SYM KEY_SYM
+          {
+            LEX *lex=Lex;
+            lex->type|= UNIQUE_KEY_FLAG;
+            lex->alter_info.flags|= ALTER_ADD_INDEX;
+          }
+        | COMMENT_SYM TEXT_STRING_sys { Lex->comment= $2; }
+        ;
+
+parse_vcol_expr:
+          PARSE_VCOL_EXPR_SYM '(' virtual_column_func ')'
+          {
+            /*
+              "PARSE_VCOL_EXPR" can only be used by the SQL server
+              when reading a '*.frm' file.
+              Prevent the end user from invoking this command.
+            */
+            if (!Lex->parse_vcol_expr)
+            {
+              my_message(ER_SYNTAX_ERROR, ER(ER_SYNTAX_ERROR), MYF(0));
               MYSQL_YYABORT;
+            }
+          }
+        ;
+
+virtual_column_func:
+          remember_name expr remember_end
+          {
+            Lex->vcol_info= new Virtual_column_info();
+            if (!Lex->vcol_info)
+            {
+              mem_alloc_error(sizeof(Virtual_column_info));
+              MYSQL_YYABORT;
+            }
+            uint expr_len= (uint)($3 - $1) - 1;
+            Lex->vcol_info->expr_str.str= (char* ) sql_memdup($1 + 1, expr_len);
+            Lex->vcol_info->expr_str.length= expr_len;
+            Lex->vcol_info->expr_item= $2;
           }
         ;
 
@@ -5457,9 +5638,9 @@ type:
           { $$=MYSQL_TYPE_YEAR; }
         | DATE_SYM
           { $$=MYSQL_TYPE_DATE; }
-        | TIME_SYM
+        | TIME_SYM opt_field_length
           { $$=MYSQL_TYPE_TIME; }
-        | TIMESTAMP
+        | TIMESTAMP opt_field_length
           {
             if (YYTHD->variables.sql_mode & MODE_MAXDB)
               $$=MYSQL_TYPE_DATETIME;
@@ -5472,7 +5653,7 @@ type:
               $$=MYSQL_TYPE_TIMESTAMP;
             }
           }
-        | DATETIME
+        | DATETIME opt_field_length
           { $$=MYSQL_TYPE_DATETIME; }
         | TINYBLOB
           {
@@ -5666,9 +5847,9 @@ attribute:
           NULL_SYM { Lex->type&= ~ NOT_NULL_FLAG; }
         | not NULL_SYM { Lex->type|= NOT_NULL_FLAG; }
         | DEFAULT now_or_signed_literal { Lex->default_value=$2; }
-        | ON UPDATE_SYM NOW_SYM optional_braces
+        | ON UPDATE_SYM NOW_SYM opt_time_precision
           {
-            Item *item= new (YYTHD->mem_root) Item_func_now_local();
+            Item *item= new (YYTHD->mem_root) Item_func_now_local($4);
             if (item == NULL)
               MYSQL_YYABORT;
             Lex->on_update_value= item;
@@ -5712,6 +5893,29 @@ attribute:
               Lex->charset=$2;
             }
           }
+        | IDENT_sys equal TEXT_STRING_sys
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, true, &Lex->option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal ident
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, false, &Lex->option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal real_ulonglong_num
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, &Lex->option_list,
+                                  &Lex->option_list_last, YYTHD->mem_root);
+          }
+        | IDENT_sys equal DEFAULT
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, &Lex->option_list, &Lex->option_list_last);
+          }
         ;
 
 
@@ -5737,9 +5941,9 @@ type_with_opt_collate:
 
 
 now_or_signed_literal:
-          NOW_SYM optional_braces
+          NOW_SYM opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_now_local();
+            $$= new (YYTHD->mem_root) Item_func_now_local($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
@@ -6081,6 +6285,29 @@ all_key_opt:
           KEY_BLOCK_SIZE opt_equal ulong_num
           { Lex->key_create_info.block_size= $3; }
 	| COMMENT_SYM TEXT_STRING_sys { Lex->key_create_info.comment= $2; }
+        | IDENT_sys equal TEXT_STRING_sys
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, true, &Lex->option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal ident
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, false, &Lex->option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal real_ulonglong_num
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, &Lex->option_list,
+                                  &Lex->option_list_last, YYTHD->mem_root);
+          }
+        | IDENT_sys equal DEFAULT
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, &Lex->option_list, &Lex->option_list_last);
+          }
         ;
 
 normal_key_opt:
@@ -6156,7 +6383,7 @@ string_list:
 */
 
 alter:
-          ALTER opt_ignore TABLE_SYM table_ident
+          ALTER alter_options TABLE_SYM table_ident
           {
             THD *thd= YYTHD;
             LEX *lex= thd->lex;
@@ -6282,7 +6509,7 @@ alter:
               my_error(ER_SP_BADSTATEMENT, MYF(0), "ALTER VIEW");
               MYSQL_YYABORT;
             }
-            lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+            lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
             lex->create_view_mode= VIEW_ALTER;
           }
           view_tail
@@ -6612,6 +6839,7 @@ alter_list_item:
             LEX *lex=Lex;
             lex->change= $3.str;
             lex->alter_info.flags|= ALTER_CHANGE_COLUMN;
+            lex->option_list= NULL;
           }
           field_spec opt_place
           {
@@ -6625,8 +6853,10 @@ alter_list_item:
             lex->comment=null_lex_str;
             lex->charset= NULL;
             lex->alter_info.flags|= ALTER_CHANGE_COLUMN;
+	    lex->vcol_info= 0;
+            lex->option_list= NULL;
           }
-          type opt_attribute
+          field_def
           {
             LEX *lex=Lex;
             if (add_field_to_list(lex->thd,&$3,
@@ -6635,7 +6865,8 @@ alter_list_item:
                                   lex->default_value, lex->on_update_value,
                                   &lex->comment,
                                   $3.str, &lex->interval_list, lex->charset,
-                                  lex->uint_geom_type))
+                                  lex->uint_geom_type,
+                                  lex->vcol_info, lex->option_list))
               MYSQL_YYABORT;
           }
           opt_place
@@ -6774,6 +7005,25 @@ opt_ignore:
         | IGNORE_SYM { Lex->ignore= 1;}
         ;
 
+alter_options:
+        { Lex->ignore= Lex->online= 0;} alter_options_part2
+	;
+	
+alter_options_part2:
+          /* empty */ 
+        | alter_option_list
+        ;
+
+alter_option_list:
+        alter_option_list alter_option
+        | alter_option
+        ;
+
+alter_option:
+	  IGNORE_SYM { Lex->ignore= 1;}
+        | ONLINE_SYM { Lex->online= 1;}
+
+
 opt_restrict:
           /* empty */ { Lex->drop_mode= DROP_DEFAULT; }
         | RESTRICT    { Lex->drop_mode= DROP_RESTRICT; }
@@ -6803,8 +7053,6 @@ slave:
             LEX *lex=Lex;
             lex->sql_command = SQLCOM_SLAVE_START;
             lex->type = 0;
-            /* We'll use mi structure for UNTIL options */
-            bzero((char*) &lex->mi, sizeof(lex->mi));
             /* If you change this code don't forget to update SLAVE START too */
           }
           slave_until
@@ -6821,8 +7069,6 @@ slave:
             LEX *lex=Lex;
             lex->sql_command = SQLCOM_SLAVE_START;
             lex->type = 0;
-            /* We'll use mi structure for UNTIL options */
-            bzero((char*) &lex->mi, sizeof(lex->mi));
           }
           slave_until
           {}
@@ -6874,7 +7120,7 @@ slave_until:
           {
             LEX *lex=Lex;
             if (((lex->mi.log_file_name || lex->mi.pos) &&
-                (lex->mi.relay_log_name || lex->mi.relay_log_pos)) ||
+                 (lex->mi.relay_log_name || lex->mi.relay_log_pos)) ||
                 !((lex->mi.log_file_name && lex->mi.pos) ||
                   (lex->mi.relay_log_name && lex->mi.relay_log_pos)))
             {
@@ -7195,8 +7441,6 @@ cache_keys_spec:
           {
             Lex->select_lex.alloc_index_hints(YYTHD);
             Select->set_index_hint_type(INDEX_HINT_USE, 
-                                        old_mode ? 
-                                        INDEX_HINT_MASK_JOIN : 
                                         INDEX_HINT_MASK_ALL);
           }
           cache_key_list_or_empty
@@ -7473,6 +7717,12 @@ select_alias:
         | TEXT_STRING_sys { $$=$1; }
         ;
 
+opt_time_precision:
+          /* empty */             { $$= 0;  }
+        | '(' ')'                 { $$= 0;  }
+        | '(' real_ulong_num ')'  { $$= $2; };
+        ;
+
 optional_braces:
           /* empty */ {}
         | '(' ')' {}
@@ -7534,7 +7784,7 @@ expr:
         | expr XOR expr %prec XOR
           {
             /* XOR is a proprietary extension */
-            $$ = new (YYTHD->mem_root) Item_cond_xor($1, $3);
+            $$ = new (YYTHD->mem_root) Item_func_xor($1, $3);
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
@@ -7888,6 +8138,133 @@ all_or_any:
         | ANY_SYM { $$ = 0; }
         ;
 
+opt_dyncol_type:
+          /* empty */ 
+          {
+            LEX *lex= Lex;
+	    $$= DYN_COL_NULL; /* automatic type */
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+	  }
+        | AS dyncol_type { $$= $2; }
+        ;
+
+dyncol_type:
+          INT_SYM
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_INT;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | UNSIGNED INT_SYM 
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_UINT;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | DOUBLE_SYM
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DOUBLE;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | REAL
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DOUBLE;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | FLOAT_SYM
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DOUBLE;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | DECIMAL_SYM float_options
+          {
+            $$= DYN_COL_DECIMAL;
+            Lex->charset= NULL;
+          }
+        | char opt_binary
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_STRING;
+            lex->length= lex->dec= 0;
+          }
+        | nchar
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_STRING;
+            lex->charset= national_charset_info;
+            lex->length= lex->dec= 0;
+          }
+        | DATE_SYM
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DATE;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | TIME_SYM opt_field_length
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_TIME;
+            lex->charset= NULL;
+            lex->dec= lex->length;
+            lex->length= 0;
+          }
+        | DATETIME opt_field_length
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DATETIME;
+            lex->charset= NULL;
+            lex->dec= lex->length;
+            lex->length= 0;
+          }
+        ;
+
+dyncall_create_element:
+   expr ',' expr opt_dyncol_type
+   {
+     LEX *lex= Lex;
+     $$= (DYNCALL_CREATE_DEF *)
+       alloc_root(YYTHD->mem_root, sizeof(DYNCALL_CREATE_DEF));
+     if ($$ == NULL)
+       MYSQL_YYABORT;
+     $$->num= $1;
+     $$->value= $3;
+     $$->type= (DYNAMIC_COLUMN_TYPE)$4;
+     $$->cs= lex->charset;
+     if (lex->length)
+       $$->len= strtoul(lex->length, NULL, 10);
+     else
+       $$->len= 0;
+     if (lex->dec)
+       $$->frac= strtoul(lex->dec, NULL, 10);
+     else
+       $$->len= 0;
+   }
+
+dyncall_create_list:
+     dyncall_create_element
+       {
+         $$= new (YYTHD->mem_root) List<DYNCALL_CREATE_DEF>;
+         if ($$ == NULL)
+           MYSQL_YYABORT;
+         $$->push_back($1);
+       }
+   | dyncall_create_list ',' dyncall_create_element
+       {
+         $1->push_back($3);
+         $$= $1;
+       }
+   ;
+
 simple_expr:
           simple_ident
         | function_call_keyword
@@ -8151,13 +8528,13 @@ function_call_keyword:
           }
         | TIME_SYM '(' expr ')'
           {
-            $$= new (YYTHD->mem_root) Item_time_typecast($3);
+            $$= new (YYTHD->mem_root) Item_time_typecast($3, AUTO_SEC_PART_DIGITS);
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
         | TIMESTAMP '(' expr ')'
           {
-            $$= new (YYTHD->mem_root) Item_datetime_typecast($3);
+            $$= new (YYTHD->mem_root) Item_datetime_typecast($3, AUTO_SEC_PART_DIGITS);
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
@@ -8264,16 +8641,9 @@ function_call_nonkeyword:
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
           }
-        | CURTIME optional_braces
-          {
-            $$= new (YYTHD->mem_root) Item_func_curtime_local();
-            if ($$ == NULL)
-              MYSQL_YYABORT;
-            Lex->safe_to_cache_query=0;
-          }
-        | CURTIME '(' expr ')'
+        | CURTIME opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_curtime_local($3);
+            $$= new (YYTHD->mem_root) Item_func_curtime_local($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
@@ -8304,16 +8674,9 @@ function_call_nonkeyword:
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
-        | NOW_SYM optional_braces
-          {
-            $$= new (YYTHD->mem_root) Item_func_now_local();
-            if ($$ == NULL)
-              MYSQL_YYABORT;
-            Lex->safe_to_cache_query=0;
-          }
-        | NOW_SYM '(' expr ')'
+        | NOW_SYM opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_now_local($3);
+            $$= new (YYTHD->mem_root) Item_func_now_local($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
@@ -8361,7 +8724,7 @@ function_call_nonkeyword:
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
-        | SYSDATE optional_braces
+        | SYSDATE opt_time_precision
           {
             /*
               Unlike other time-related functions, SYSDATE() is
@@ -8372,19 +8735,9 @@ function_call_nonkeyword:
             */
             Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
             if (global_system_variables.sysdate_is_now == 0)
-              $$= new (YYTHD->mem_root) Item_func_sysdate_local();
+              $$= new (YYTHD->mem_root) Item_func_sysdate_local($2);
             else
-              $$= new (YYTHD->mem_root) Item_func_now_local();
-            if ($$ == NULL)
-              MYSQL_YYABORT;
-            Lex->safe_to_cache_query=0;
-          }
-        | SYSDATE '(' expr ')'
-          {
-            if (global_system_variables.sysdate_is_now == 0)
-              $$= new (YYTHD->mem_root) Item_func_sysdate_local($3);
-            else
-              $$= new (YYTHD->mem_root) Item_func_now_local($3);
+              $$= new (YYTHD->mem_root) Item_func_now_local($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
@@ -8408,20 +8761,65 @@ function_call_nonkeyword:
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
           }
-        | UTC_TIME_SYM optional_braces
+        | UTC_TIME_SYM opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_curtime_utc();
+            $$= new (YYTHD->mem_root) Item_func_curtime_utc($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
           }
-        | UTC_TIMESTAMP_SYM optional_braces
+        | UTC_TIMESTAMP_SYM opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_now_utc();
+            $$= new (YYTHD->mem_root) Item_func_now_utc($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
           }
+        |
+          COLUMN_ADD_SYM '(' expr ',' dyncall_create_list ')'
+          {
+            $$= create_func_dyncol_add(YYTHD, $3, *$5);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_DELETE_SYM '(' expr ',' expr_list ')'
+          {
+            $$= create_func_dyncol_delete(YYTHD, $3, *$5);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_EXISTS_SYM '(' expr ',' expr ')'
+          {
+            $$= new (YYTHD->mem_root) Item_func_dyncol_exists($3, $5);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_LIST_SYM '(' expr ')'
+          {
+            $$= new (YYTHD->mem_root) Item_func_dyncol_list($3);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_CREATE_SYM '(' dyncall_create_list ')'
+          {
+            $$= create_func_dyncol_create(YYTHD, *$3);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_GET_SYM '(' expr ',' expr AS cast_type ')'
+          {
+            LEX *lex= Lex;
+            $$= create_func_dyncol_get(YYTHD, $3, $5, $7,
+                                        lex->length, lex->dec,
+                                        lex->charset);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
         ;
 
 /*
@@ -8724,7 +9122,7 @@ function_call_generic:
 
             builder= find_qualified_function_builder(thd);
             DBUG_ASSERT(builder);
-            item= builder->create(thd, $1, $3, true, $5);
+            item= builder->create_with_db(thd, $1, $3, true, $5);
 
             if (! ($$= item))
             {
@@ -9038,6 +9436,8 @@ cast_type:
           { $$=ITEM_CAST_CHAR; Lex->dec= 0; }
         | NCHAR_SYM opt_field_length
           { $$=ITEM_CAST_CHAR; Lex->charset= national_charset_info; Lex->dec=0; }
+        | INT_SYM
+          { $$=ITEM_CAST_SIGNED_INT; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
         | SIGNED_SYM
           { $$=ITEM_CAST_SIGNED_INT; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
         | SIGNED_SYM INT_SYM
@@ -9048,13 +9448,24 @@ cast_type:
           { $$=ITEM_CAST_UNSIGNED_INT; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
         | DATE_SYM
           { $$=ITEM_CAST_DATE; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
-        | TIME_SYM
-          { $$=ITEM_CAST_TIME; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
-        | DATETIME
-          { $$=ITEM_CAST_DATETIME; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
+        | TIME_SYM opt_field_length
+          {
+            $$=ITEM_CAST_TIME;
+            LEX *lex= Lex;
+            lex->charset= NULL; lex->dec= lex->length; lex->length= (char*)0;
+           }
+        | DATETIME opt_field_length
+          {
+            $$=ITEM_CAST_DATETIME;
+            LEX *lex= Lex;
+            lex->charset= NULL; lex->dec= lex->length; lex->length= (char*)0;
+           }
         | DECIMAL_SYM float_options
           { $$=ITEM_CAST_DECIMAL; Lex->charset= NULL; }
-        ;
+        | DOUBLE_SYM
+          { Lex->charset= NULL; Lex->length= Lex->dec= 0;}
+          opt_precision
+          { $$=ITEM_CAST_DOUBLE; }
 
 opt_expr_list:
           /* empty */ { $$= NULL; }
@@ -9560,7 +9971,7 @@ opt_outer:
 index_hint_clause:
           /* empty */
           {
-            $$= old_mode ?  INDEX_HINT_MASK_JOIN : INDEX_HINT_MASK_ALL; 
+            $$= YYTHD->variables.old_mode ?  INDEX_HINT_MASK_JOIN : INDEX_HINT_MASK_ALL; 
           }
         | FOR_SYM JOIN_SYM      { $$= INDEX_HINT_MASK_JOIN;  }
         | FOR_SYM ORDER_SYM BY  { $$= INDEX_HINT_MASK_ORDER; }
@@ -9705,7 +10116,7 @@ where_clause:
           expr
           {
             SELECT_LEX *select= Select;
-            select->where= $3;
+            select->where= normalize_cond($3);
             select->parsing_place= NO_MATTER;
             if ($3)
               $3->top_level_item();
@@ -9721,7 +10132,7 @@ having_clause:
           expr
           {
             SELECT_LEX *sel= Select;
-            sel->having= $3;
+            sel->having= normalize_cond($3);
             sel->parsing_place= NO_MATTER;
             if ($3)
               $3->top_level_item();
@@ -10032,6 +10443,7 @@ ulonglong_num:
 real_ulonglong_num:
           NUM           { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); }
         | ULONGLONG_NUM { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); }
+        | HEX_NUM       { $$= strtoull($1.str, (char**) 0, 16); }
         | LONG_NUM      { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); }
         | dec_num_error { MYSQL_YYABORT; }
         ;
@@ -10046,6 +10458,11 @@ dec_num:
         | FLOAT_NUM
         ;
 
+choice:
+	ulong_num { $$= $1 != 0 ? HA_CHOICE_YES : HA_CHOICE_NO; }
+	| DEFAULT { $$= HA_CHOICE_UNDEF; }
+	;
+
 procedure_clause:
           /* empty */
         | PROCEDURE_SYM ident /* Procedure name */
@@ -10449,16 +10866,12 @@ replace:
 insert_lock_option:
           /* empty */
           {
-#ifdef HAVE_QUERY_CACHE
             /*
-              If it is SP we do not allow insert optimisation whan result of
+              If it is SP we do not allow insert optimisation when result of
               insert visible only after the table unlocking but everyone can
               read table.
             */
             $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT);
-#else
-            $$= TL_WRITE_CONCURRENT_INSERT;
-#endif
           }
         | LOW_PRIORITY  { $$= TL_WRITE_LOW_PRIORITY; }
         | DELAYED_SYM
@@ -11090,6 +11503,34 @@ show_param:
           {
             Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
           }
+        | CLIENT_STATS_SYM
+          {
+           LEX *lex= Lex;
+           lex->sql_command= SQLCOM_SHOW_CLIENT_STATS;
+           if (prepare_schema_table(YYTHD, lex, 0, SCH_CLIENT_STATS))
+             MYSQL_YYABORT;
+          }
+        | USER_STATS_SYM
+          {
+             LEX *lex= Lex;
+             lex->sql_command= SQLCOM_SHOW_USER_STATS;
+             if (prepare_schema_table(YYTHD, lex, 0, SCH_USER_STATS))
+               MYSQL_YYABORT;
+          }
+        | TABLE_STATS_SYM
+          {
+             LEX *lex= Lex;
+             lex->sql_command= SQLCOM_SHOW_TABLE_STATS;
+             if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_STATS))
+               MYSQL_YYABORT;
+          }
+        | INDEX_STATS_SYM
+          {
+             LEX *lex= Lex;
+             lex->sql_command= SQLCOM_SHOW_INDEX_STATS;
+             if (prepare_schema_table(YYTHD, lex, 0, SCH_INDEX_STATS))
+               MYSQL_YYABORT;
+          }
         | CREATE PROCEDURE_SYM sp_name
           {
             LEX *lex= Lex;
@@ -11196,7 +11637,7 @@ wild_and_where:
           }
         | WHERE expr
           {
-            Select->where= $2;
+            Select->where= normalize_cond($2);
             if ($2)
               $2->top_level_item();
           }
@@ -11282,10 +11723,10 @@ flush_options:
 
 opt_with_read_lock:
           /* empty */ {}
-        | WITH READ_SYM LOCK_SYM
+        | WITH READ_SYM LOCK_SYM optional_flush_tables_arguments
           {
             TABLE_LIST *tables= Lex->query_tables;
-            Lex->type|= REFRESH_READ_LOCK;
+            Lex->type|= REFRESH_READ_LOCK | $4;
             for (; tables; tables= tables->next_global)
             {
               tables->mdl_request.set_type(MDL_SHARED_NO_WRITE);
@@ -11329,6 +11770,14 @@ flush_option:
             Lex->type|= REFRESH_SLAVE;
             Lex->reset_slave_info.all= false;
           }
+  	| CLIENT_STATS_SYM
+          { Lex->type|= REFRESH_CLIENT_STATS; }
+  	| USER_STATS_SYM
+         { Lex->type|= REFRESH_USER_STATS; }
+  	| TABLE_STATS_SYM
+          { Lex->type|= REFRESH_TABLE_STATS; }
+  	| INDEX_STATS_SYM
+          { Lex->type|= REFRESH_INDEX_STATS; }
         | MASTER_SYM
           { Lex->type|= REFRESH_MASTER; }
         | DES_KEY_FILE
@@ -11342,6 +11791,10 @@ opt_table_list:
         | table_list {}
         ;
 
+optional_flush_tables_arguments:
+          /* empty */        {$$= 0;}
+        | AND_SYM DISABLE_SYM CHECKPOINT_SYM {$$= REFRESH_CHECKPOINT; } 
+
 reset:
           RESET_SYM
           {
@@ -11484,15 +11937,11 @@ load_data_lock:
           /* empty */ { $$= TL_WRITE_DEFAULT; }
         | CONCURRENT
           {
-#ifdef HAVE_QUERY_CACHE
             /*
-              Ignore this option in SP to avoid problem with query cache
+              Ignore this option in SP to avoid problem with query cache and
+              triggers with non default priority locks
             */
-            if (Lex->sphead != 0)
-              $$= TL_WRITE_DEFAULT;
-            else
-#endif
-              $$= TL_WRITE_CONCURRENT_INSERT;
+            $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT);
           }
         | LOW_PRIORITY { $$= TL_WRITE_LOW_PRIORITY; }
         ;
@@ -12398,7 +12847,14 @@ keyword:
         | CACHE_SYM             {}
         | CHARSET               {}
         | CHECKSUM_SYM          {}
+        | CHECKPOINT_SYM        {}
         | CLOSE_SYM             {}
+        | COLUMN_ADD_SYM        {}
+        | COLUMN_CREATE_SYM     {}
+        | COLUMN_DELETE_SYM     {}
+        | COLUMN_EXISTS_SYM     {}
+        | COLUMN_GET_SYM        {}
+        | COLUMN_LIST_SYM       {}
         | COMMENT_SYM           {}
         | COMMIT_SYM            {}
         | CONTAINS_SYM          {}
@@ -12455,6 +12911,7 @@ keyword_sp:
         | AGAINST                  {}
         | AGGREGATE_SYM            {}
         | ALGORITHM_SYM            {}
+        | ALWAYS_SYM               {}
         | ANY_SYM                  {}
         | AT_SYM                   {}
         | AUTHORS_SYM              {}
@@ -12473,6 +12930,7 @@ keyword_sp:
         | CHAIN_SYM                {}
         | CHANGED                  {}
         | CIPHER_SYM               {}
+        | CLIENT_STATS_SYM         {}
         | CLIENT_SYM               {}
         | CLASS_ORIGIN_SYM         {}
         | COALESCE                 {}
@@ -12532,6 +12990,7 @@ keyword_sp:
         | FIRST_SYM                {}
         | FIXED_SYM                {}
         | GENERAL                  {}
+        | GENERATED_SYM            {}
         | GEOMETRY_SYM             {}
         | GEOMETRYCOLLECTION       {}
         | GET_FORMAT               {}
@@ -12542,6 +13001,7 @@ keyword_sp:
         | HOUR_SYM                 {}
         | IDENTIFIED_SYM           {}
         | IGNORE_SERVER_IDS_SYM    {}
+        | INDEX_STATS_SYM          {}
         | INVOKER_SYM              {}
         | IMPORT                   {}
         | INDEXES                  {}
@@ -12615,12 +13075,14 @@ keyword_sp:
         | OLD_PASSWORD             {}
         | ONE_SHOT_SYM             {}
         | ONE_SYM                  {}
+        | ONLINE_SYM               {}
         | PACK_KEYS_SYM            {}
         | PAGE_SYM                 {}
         | PARTIAL                  {}
         | PARTITIONING_SYM         {}
         | PARTITIONS_SYM           {}
         | PASSWORD                 {}
+        | PERSISTENT_SYM           {}
         | PHASE_SYM                {}
         | PLUGIN_SYM               {}
         | PLUGINS_SYM              {}
@@ -12692,6 +13154,7 @@ keyword_sp:
         | SWAPS_SYM                {}
         | SWITCHES_SYM             {}
         | TABLE_NAME_SYM           {}
+        | TABLE_STATS_SYM          {}
         | TABLES                   {}
         | TABLE_CHECKSUM_SYM       {}
         | TABLESPACE               {}
@@ -12700,6 +13163,7 @@ keyword_sp:
         | TEXT_SYM                 {}
         | THAN_SYM                 {}
         | TRANSACTION_SYM          {}
+        | TRANSACTIONAL_SYM        {}
         | TRIGGERS_SYM             {}
         | TIMESTAMP                {}
         | TIMESTAMP_ADD            {}
@@ -12716,9 +13180,11 @@ keyword_sp:
         | UNKNOWN_SYM              {}
         | UNTIL_SYM                {}
         | USER                     {}
+        | USER_STATS_SYM           {}
         | USE_FRM                  {}
         | VARIABLES                {}
         | VIEW_SYM                 {}
+        | VIRTUAL_SYM              {}
         | VALUE_SYM                {}
         | WARNINGS                 {}
         | WAIT_SYM                 {}
@@ -13213,6 +13679,8 @@ table_lock:
             bool lock_for_write= (lock_type >= TL_WRITE_ALLOW_WRITE);
             if (!Select->add_table_to_list(YYTHD, $1, $2, 0, lock_type,
                                            (lock_for_write ?
+                                            lock_type == TL_WRITE_CONCURRENT_INSERT ?
+                                            MDL_SHARED_WRITE :
                                             MDL_SHARED_NO_READ_WRITE :
                                             MDL_SHARED_READ)))
               MYSQL_YYABORT;
@@ -13222,6 +13690,11 @@ table_lock:
 lock_option:
           READ_SYM               { $$= TL_READ_NO_INSERT; }
         | WRITE_SYM              { $$= TL_WRITE_DEFAULT; }
+        | WRITE_SYM CONCURRENT
+          {
+            $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT);
+          }
+
         | LOW_PRIORITY WRITE_SYM { $$= TL_WRITE_LOW_PRIORITY; }
         | READ_SYM LOCAL_SYM     { $$= TL_READ; }
         ;
@@ -13346,13 +13819,13 @@ revoke:
         ;
 
 revoke_command:
-          grant_privileges ON opt_table grant_ident FROM grant_list
+          grant_privileges ON opt_table grant_ident FROM user_list
           {
             LEX *lex= Lex;
             lex->sql_command= SQLCOM_REVOKE;
             lex->type= 0;
           }
-        | grant_privileges ON FUNCTION_SYM grant_ident FROM grant_list
+        | grant_privileges ON FUNCTION_SYM grant_ident FROM user_list
           {
             LEX *lex= Lex;
             if (lex->columns.elements)
@@ -13363,7 +13836,7 @@ revoke_command:
             lex->sql_command= SQLCOM_REVOKE;
             lex->type= TYPE_ENUM_FUNCTION;
           }
-        | grant_privileges ON PROCEDURE_SYM grant_ident FROM grant_list
+        | grant_privileges ON PROCEDURE_SYM grant_ident FROM user_list
           {
             LEX *lex= Lex;
             if (lex->columns.elements)
@@ -13374,11 +13847,11 @@ revoke_command:
             lex->sql_command= SQLCOM_REVOKE;
             lex->type= TYPE_ENUM_PROCEDURE;
           }
-        | ALL opt_privileges ',' GRANT OPTION FROM grant_list
+        | ALL opt_privileges ',' GRANT OPTION FROM user_list
           {
             Lex->sql_command = SQLCOM_REVOKE_ALL;
           }
-        | PROXY_SYM ON user FROM grant_list
+        | PROXY_SYM ON user FROM user_list
           {
             LEX *lex= Lex;
             lex->users_list.push_front ($3);
@@ -13620,12 +14093,13 @@ grant_list:
           }
         ;
 
+via_or_with: VIA_SYM | WITH ;
+using_or_as: USING | AS ;
+
 grant_user:
           user IDENTIFIED_SYM BY TEXT_STRING
           {
             $$=$1; $1->password=$4;
-            if (Lex->sql_command == SQLCOM_REVOKE)
-              MYSQL_YYABORT;
             if ($4.length)
             {
               if (YYTHD->variables.old_passwords)
@@ -13652,23 +14126,17 @@ grant_user:
           }
         | user IDENTIFIED_SYM BY PASSWORD TEXT_STRING
           { 
-            if (Lex->sql_command == SQLCOM_REVOKE)
-              MYSQL_YYABORT;
             $$= $1; 
             $1->password= $5; 
           }
-        | user IDENTIFIED_SYM WITH ident_or_text
+        | user IDENTIFIED_SYM via_or_with ident_or_text
           {
-            if (Lex->sql_command == SQLCOM_REVOKE)
-              MYSQL_YYABORT;
             $$= $1;
             $1->plugin= $4;
             $1->auth= empty_lex_str;
           }
-        | user IDENTIFIED_SYM WITH ident_or_text AS TEXT_STRING_sys
+        | user IDENTIFIED_SYM via_or_with ident_or_text using_or_as TEXT_STRING_sys
           {
-            if (Lex->sql_command == SQLCOM_REVOKE)
-              MYSQL_YYABORT;
             $$= $1;
             $1->plugin= $4;
             $1->auth= $6;
@@ -13703,7 +14171,7 @@ column_list_id:
             while ((point=iter++))
             {
               if (!my_strcasecmp(system_charset_info,
-                                 point->column.ptr(), new_str->ptr()))
+                                 point->column.c_ptr(), new_str->c_ptr()))
                 break;
             }
             lex->grant_tot_col|= lex->which_columns;
@@ -14135,7 +14603,7 @@ view_replace:
 
 view_algorithm:
           ALGORITHM_SYM EQ UNDEFINED_SYM
-          { Lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED; }
+          { Lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED; }
         | ALGORITHM_SYM EQ MERGE_SYM
           { Lex->create_view_algorithm= VIEW_ALGORITHM_MERGE; }
         | ALGORITHM_SYM EQ TEMPTABLE_SYM
@@ -14400,6 +14868,7 @@ sf_tail:
             lex->length= lex->dec= NULL;
             lex->interval_list.empty();
             lex->type= 0;
+            lex->vcol_info= 0;
           }
           type_with_opt_collate /* $11 */
           { /* $12 */
@@ -14650,6 +15119,13 @@ uninstall:
           }
         ;
 
+/* Avoid compiler warning from sql_yacc.cc where yyerrlab1 is not used */
+keep_gcc_happy:
+	IMPOSSIBLE_ACTION
+	{
+	  YYERROR;
+	}
+
 /**
   @} (end of group Parser)
 */
diff --git a/sql/strfunc.h b/sql/strfunc.h
index f8ef914a641..57c5427fcd0 100644
--- a/sql/strfunc.h
+++ b/sql/strfunc.h
@@ -18,8 +18,6 @@
 
 #include "my_global.h"                          /* ulonglong, uint */
 
-typedef struct charset_info_st CHARSET_INFO;
-typedef struct st_mysql_lex_string LEX_STRING;
 typedef struct st_typelib TYPELIB;
 
 ulonglong find_set(TYPELIB *lib, const char *x, uint length, CHARSET_INFO *cs,
diff --git a/sql/structs.h b/sql/structs.h
index 8ac855a263c..347cb97e152 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -25,6 +25,7 @@
 #include "my_time.h"                   /* enum_mysql_timestamp_type */
 #include "thr_lock.h"                  /* thr_lock_type */
 #include "my_base.h"                   /* ha_rows, ha_key_alg */
+#include <mysql_com.h>                  /* USERNAME_LENGTH */
 
 struct TABLE;
 class Field;
@@ -65,7 +66,8 @@ typedef struct st_key_part_info {	/* Info about a key part */
   Field *field;
   uint	offset;				/* offset in record (from 0) */
   uint	null_offset;			/* Offset to null_bit in record */
-  uint16 length;                        /* Length of keypart value in bytes */
+  /* Length of key part in bytes, excluding NULL flag and length bytes */
+  uint16 length;
   /* 
     Number of bytes required to store the keypart value. This may be
     different from the "length" field as it also counts
@@ -80,14 +82,16 @@ typedef struct st_key_part_info {	/* Info about a key part */
   uint8 null_bit;			/* Position to null_bit */
 } KEY_PART_INFO ;
 
+class engine_option_value;
+struct ha_index_option_struct;
 
 typedef struct st_key {
   uint	key_length;			/* Tot length of key */
   ulong flags;                          /* dupp key and pack flags */
   uint	key_parts;			/* How many key_parts */
-  uint  extra_length;
   uint	usable_key_parts;		/* Should normally be = key_parts */
   uint  block_size;
+  uint  name_length;
   enum  ha_key_alg algorithm;
   /*
     Note that parser is used when the table is opened for use, and
@@ -100,6 +104,8 @@ typedef struct st_key {
   };
   KEY_PART_INFO *key_part;
   char	*name;				/* Name of key */
+  /* Unique name for cache;  db + \0 + table_name + \0 + key_name + \0 */
+  uchar *cache_name;
   /*
     Array of AVG(#records with the same field value) for 1st ... Nth key part.
     0 means 'not known'.
@@ -111,6 +117,9 @@ typedef struct st_key {
   } handler;
   TABLE *table;
   LEX_STRING comment;
+  /** reference to the list of options or NULL */
+  engine_option_value *option_list;
+  ha_index_option_struct *option_struct;                  /* structure with parsed options */
 } KEY;
 
 
@@ -218,6 +227,111 @@ typedef struct  user_conn {
   USER_RESOURCES user_resources;
 } USER_CONN;
 
+typedef struct st_user_stats
+{
+  char user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
+  // Account name the user is mapped to when this is a user from mapped_user.
+  // Otherwise, the same value as user.
+  char priv_user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
+  uint user_name_length;
+  uint total_connections;
+  uint concurrent_connections;
+  time_t connected_time;  // in seconds
+  double busy_time;       // in seconds
+  double cpu_time;        // in seconds
+  ulonglong bytes_received;
+  ulonglong bytes_sent;
+  ulonglong binlog_bytes_written;
+  ha_rows   rows_read, rows_sent;
+  ha_rows rows_updated, rows_deleted, rows_inserted;
+  ulonglong select_commands, update_commands, other_commands;
+  ulonglong commit_trans, rollback_trans;
+  ulonglong denied_connections, lost_connections;
+  ulonglong access_denied_errors;
+  ulonglong empty_queries;
+} USER_STATS;
+
+/* Lookup function for hash tables with USER_STATS entries */
+extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
+                                     my_bool not_used __attribute__((unused)));
+
+/* Free all memory for a hash table with USER_STATS entries */
+extern void free_user_stats(USER_STATS* user_stats);
+
+/* Intialize an instance of USER_STATS */
+extern void
+init_user_stats(USER_STATS *user_stats,
+                const char *user,
+                size_t user_length,
+                const char *priv_user,
+                uint total_connections,
+                uint concurrent_connections,
+                time_t connected_time,
+                double busy_time,
+                double cpu_time,
+                ulonglong bytes_received,
+                ulonglong bytes_sent,
+                ulonglong binlog_bytes_written,
+                ha_rows rows_sent,
+                ha_rows rows_read,
+                ha_rows rows_inserted,
+                ha_rows rows_deleted,
+                ha_rows rows_updated,
+                ulonglong select_commands,
+                ulonglong update_commands,
+                ulonglong other_commands,
+                ulonglong commit_trans,
+                ulonglong rollback_trans,
+                ulonglong denied_connections,
+                ulonglong lost_connections,
+                ulonglong access_denied_errors,
+                ulonglong empty_queries);
+
+/* Increment values of an instance of USER_STATS */
+extern void
+add_user_stats(USER_STATS *user_stats,
+               uint total_connections,
+               uint concurrent_connections,
+               time_t connected_time,
+               double busy_time,
+               double cpu_time,
+               ulonglong bytes_received,
+               ulonglong bytes_sent,
+               ulonglong binlog_bytes_written,
+               ha_rows rows_sent,
+               ha_rows rows_read,
+               ha_rows rows_inserted,
+               ha_rows rows_deleted,
+               ha_rows rows_updated,
+               ulonglong select_commands,
+               ulonglong update_commands,
+               ulonglong other_commands,
+               ulonglong commit_trans,
+               ulonglong rollback_trans,
+               ulonglong denied_connections,
+               ulonglong lost_connections,
+               ulonglong access_denied_errors,
+               ulonglong empty_queries);
+
+typedef struct st_table_stats
+{
+  char table[NAME_LEN * 2 + 2];  // [db] + '\0' + [table] + '\0'
+  uint table_name_length;
+  ulonglong rows_read, rows_changed;
+  ulonglong rows_changed_x_indexes;
+  /* Stores enum db_type, but forward declarations cannot be done */
+  int engine_type;
+} TABLE_STATS;
+
+typedef struct st_index_stats
+{
+  // [db] + '\0' + [table] + '\0' + [index] + '\0'
+  char index[NAME_LEN * 3 + 3];
+  uint index_name_length;                       /* Length of 'index' */
+  ulonglong rows_read;
+} INDEX_STATS;
+
+
 	/* Bits in form->update */
 #define REG_MAKE_DUPP		1	/* Make a copy of record when read */
 #define REG_NEW_RECORD		2	/* Write a new record if not found */
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index da8e61da52e..168c8a9c998 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -44,19 +44,14 @@
                      // mysql_user_table_is_in_short_password_format
 #include "derror.h"  // read_texts
 #include "sql_base.h"                           // close_cached_tables
+#include <myisam.h>
+#include "log_slow.h"
 
 #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
 #include "../storage/perfschema/pfs_server.h"
 #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
 
 /*
-  This forward declaration is needed because including sql_base.h
-  causes further includes.  [TODO] Eliminate this forward declaration
-  and include a file with the prototype instead.
-*/
-extern void close_thread_tables(THD *thd);
-
-/*
   The rule for this file: everything should be 'static'. When a sys_var
   variable or a function from this file is - in very rare cases - needed
   elsewhere it should be explicitly declared 'export' here to show that it's
@@ -66,136 +61,117 @@ extern void close_thread_tables(THD *thd);
 
 #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
 
-#define PFS_TRAILING_PROPERTIES \
-  NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(NULL), ON_UPDATE(NULL), \
-  0, NULL, sys_var::PARSE_EARLY
-
 static Sys_var_mybool Sys_pfs_enabled(
        "performance_schema",
        "Enable the performance schema.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_enabled),
-       CMD_LINE(OPT_ARG), DEFAULT(FALSE),
-       PFS_TRAILING_PROPERTIES);
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_enabled),
+       CMD_LINE(OPT_ARG), DEFAULT(FALSE));
 
 static Sys_var_ulong Sys_pfs_events_waits_history_long_size(
        "performance_schema_events_waits_history_long_size",
        "Number of rows in EVENTS_WAITS_HISTORY_LONG.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_events_waits_history_long_sizing),
+       PARSED_EARLY READ_ONLY
+       GLOBAL_VAR(pfs_param.m_events_waits_history_long_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024*1024),
-       DEFAULT(PFS_WAITS_HISTORY_LONG_SIZE),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_WAITS_HISTORY_LONG_SIZE), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_events_waits_history_size(
        "performance_schema_events_waits_history_size",
        "Number of rows per thread in EVENTS_WAITS_HISTORY.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_events_waits_history_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_waits_history_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024),
-       DEFAULT(PFS_WAITS_HISTORY_SIZE),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_WAITS_HISTORY_SIZE), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_cond_classes(
        "performance_schema_max_cond_classes",
        "Maximum number of condition instruments.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_cond_class_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_cond_class_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256),
-       DEFAULT(PFS_MAX_COND_CLASS),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_COND_CLASS), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_cond_instances(
        "performance_schema_max_cond_instances",
        "Maximum number of instrumented condition objects.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_cond_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_cond_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024*1024),
-       DEFAULT(PFS_MAX_COND),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_COND), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_file_classes(
        "performance_schema_max_file_classes",
        "Maximum number of file instruments.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_file_class_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_file_class_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256),
-       DEFAULT(PFS_MAX_FILE_CLASS),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_FILE_CLASS), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_file_handles(
        "performance_schema_max_file_handles",
        "Maximum number of opened instrumented files.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_file_handle_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_file_handle_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024*1024),
-       DEFAULT(PFS_MAX_FILE_HANDLE),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_FILE_HANDLE), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_file_instances(
        "performance_schema_max_file_instances",
        "Maximum number of instrumented files.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_file_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_file_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024*1024),
-       DEFAULT(PFS_MAX_FILE),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_FILE), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_mutex_classes(
        "performance_schema_max_mutex_classes",
        "Maximum number of mutex instruments.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_mutex_class_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_mutex_class_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256),
-       DEFAULT(PFS_MAX_MUTEX_CLASS),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_MUTEX_CLASS), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_mutex_instances(
        "performance_schema_max_mutex_instances",
        "Maximum number of instrumented MUTEX objects.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_mutex_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_mutex_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 100*1024*1024),
-       DEFAULT(PFS_MAX_MUTEX),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_MUTEX), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_rwlock_classes(
        "performance_schema_max_rwlock_classes",
        "Maximum number of rwlock instruments.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_rwlock_class_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_rwlock_class_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256),
-       DEFAULT(PFS_MAX_RWLOCK_CLASS),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_RWLOCK_CLASS), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_rwlock_instances(
        "performance_schema_max_rwlock_instances",
        "Maximum number of instrumented RWLOCK objects.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_rwlock_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_rwlock_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 100*1024*1024),
-       DEFAULT(PFS_MAX_RWLOCK),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_RWLOCK), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_table_handles(
        "performance_schema_max_table_handles",
        "Maximum number of opened instrumented tables.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_table_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_table_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024*1024),
-       DEFAULT(PFS_MAX_TABLE),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_TABLE), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_table_instances(
        "performance_schema_max_table_instances",
        "Maximum number of instrumented tables.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_table_share_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_table_share_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024*1024),
-       DEFAULT(PFS_MAX_TABLE_SHARE),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_TABLE_SHARE), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_thread_classes(
        "performance_schema_max_thread_classes",
        "Maximum number of thread instruments.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_thread_class_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_thread_class_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256),
-       DEFAULT(PFS_MAX_THREAD_CLASS),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_THREAD_CLASS), BLOCK_SIZE(1));
 
 static Sys_var_ulong Sys_pfs_max_thread_instances(
        "performance_schema_max_thread_instances",
        "Maximum number of instrumented threads.",
-       READ_ONLY GLOBAL_VAR(pfs_param.m_thread_sizing),
+       PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_thread_sizing),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024*1024),
-       DEFAULT(PFS_MAX_THREAD),
-       BLOCK_SIZE(1), PFS_TRAILING_PROPERTIES);
+       DEFAULT(PFS_MAX_THREAD), BLOCK_SIZE(1));
 
 #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
 
@@ -406,7 +382,7 @@ static bool check_charset(sys_var *self, THD *thd, set_var *var)
     else if (!(var->save_result.ptr= get_charset_by_csname(res->c_ptr(),
                                                            MY_CS_PRIMARY,
                                                            MYF(0))) &&
-             !(var->save_result.ptr= get_old_charset_by_name(res->c_ptr())))
+             !(var->save_result.ptr=get_old_charset_by_name(res->c_ptr())))
     {
       ErrConvString err(res);
       my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), err.ptr());
@@ -605,15 +581,20 @@ export bool fix_delay_key_write(sys_var *self, THD *thd, enum_var_type type)
   switch (delay_key_write_options) {
   case DELAY_KEY_WRITE_NONE:
     myisam_delay_key_write=0;
+    ha_open_options&= ~HA_OPEN_DELAY_KEY_WRITE;
     break;
   case DELAY_KEY_WRITE_ON:
     myisam_delay_key_write=1;
+    ha_open_options&= ~HA_OPEN_DELAY_KEY_WRITE;
     break;
   case DELAY_KEY_WRITE_ALL:
     myisam_delay_key_write=1;
     ha_open_options|= HA_OPEN_DELAY_KEY_WRITE;
     break;
   }
+#ifdef WITH_ARIA_STORAGE_ENGINE
+  maria_delay_key_write= myisam_delay_key_write;
+#endif
   return false;
 }
 static const char *delay_key_write_names[]= { "OFF", "ON", "ALL", NullS };
@@ -727,7 +708,7 @@ static Sys_var_ulong Sys_flush_time(
        "given interval",
        GLOBAL_VAR(flush_time),
        CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, LONG_TIMEOUT),
-       DEFAULT(FLUSH_TIME), BLOCK_SIZE(1));
+       DEFAULT(0), BLOCK_SIZE(1));
 
 static bool check_ftb_syntax(sys_var *self, THD *thd, set_var *var)
 {
@@ -797,8 +778,8 @@ static bool check_init_string(sys_var *self, THD *thd, set_var *var)
 static PolyLock_rwlock PLock_sys_init_connect(&LOCK_sys_init_connect);
 static Sys_var_lexstring Sys_init_connect(
        "init_connect", "Command(s) that are executed for each "
-       "new connection", GLOBAL_VAR(opt_init_connect),
-       CMD_LINE(REQUIRED_ARG), IN_SYSTEM_CHARSET,
+       "new connection (unless the user has SUPER privilege)",
+       GLOBAL_VAR(opt_init_connect), CMD_LINE(REQUIRED_ARG), IN_SYSTEM_CHARSET,
        DEFAULT(""), &PLock_sys_init_connect, NOT_IN_BINLOG,
        ON_CHECK(check_init_string));
 
@@ -830,7 +811,7 @@ static Sys_var_ulong Sys_interactive_timeout(
 
 static Sys_var_ulong Sys_join_buffer_size(
        "join_buffer_size",
-       "The size of the buffer that is used for full joins",
+       "The size of the buffer that is used for joins",
        SESSION_VAR(join_buff_size), CMD_LINE(REQUIRED_ARG),
        VALID_RANGE(128, ULONG_MAX), DEFAULT(128*1024), BLOCK_SIZE(128));
 
@@ -851,7 +832,7 @@ static Sys_var_keycache Sys_key_cache_block_size(
        CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_BLOCK_SIZE),
        VALID_RANGE(512, 1024*16), DEFAULT(KEY_CACHE_BLOCK_SIZE),
        BLOCK_SIZE(512), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
-       ON_UPDATE(update_keycache_param));
+       ON_UPDATE(resize_keycache));
 
 static Sys_var_keycache Sys_key_cache_division_limit(
        "key_cache_division_limit",
@@ -860,7 +841,7 @@ static Sys_var_keycache Sys_key_cache_division_limit(
        CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_DIVISION_LIMIT),
        VALID_RANGE(1, 100), DEFAULT(100),
        BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
-       ON_UPDATE(update_keycache_param));
+       ON_UPDATE(change_keycache_param));
 
 static Sys_var_keycache Sys_key_cache_age_threshold(
        "key_cache_age_threshold", "This characterizes the number of "
@@ -872,7 +853,7 @@ static Sys_var_keycache Sys_key_cache_age_threshold(
        CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_AGE_THRESHOLD),
        VALID_RANGE(100, ULONG_MAX), DEFAULT(300),
        BLOCK_SIZE(100), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
-       ON_UPDATE(update_keycache_param));
+       ON_UPDATE(change_keycache_param));
 
 static Sys_var_mybool Sys_large_files_support(
        "large_files_support",
@@ -931,7 +912,10 @@ static Sys_var_mybool Sys_trust_function_creators(
        CMD_LINE(OPT_ARG), DEFAULT(FALSE));
 
 static Sys_var_charptr Sys_log_error(
-       "log_error", "Error log file",
+       "log_error",
+       "Log errors to file (instead of stdout).  If file name is not specified "
+       "then 'datadir'/'log-basename'.err or the pid-file path with extension "
+       ".err is used",
        READ_ONLY GLOBAL_VAR(log_error_file_ptr),
        CMD_LINE(OPT_ARG, OPT_LOG_ERROR),
        IN_FS_CHARSET, DEFAULT(disabled_my_option));
@@ -945,7 +929,7 @@ static Sys_var_mybool Sys_log_queries_not_using_indexes(
 
 static Sys_var_ulong Sys_log_warnings(
        "log_warnings",
-       "Log some not critical warnings to the log file",
+       "Log some not critical warnings to the general log file",
        SESSION_VAR(log_warnings),
        CMD_LINE(OPT_ARG, 'W'),
        VALID_RANGE(0, ULONG_MAX), DEFAULT(1), BLOCK_SIZE(1));
@@ -1031,8 +1015,7 @@ static bool session_readonly(sys_var *self, THD *thd, set_var *var)
   return true;
 }
 
-static bool
-check_max_allowed_packet(sys_var *self, THD *thd,  set_var *var)
+static bool check_max_allowed_packet(sys_var *self, THD *thd,  set_var *var)
 {
   longlong val;
   if (session_readonly(self, thd, var))
@@ -1094,7 +1077,7 @@ static Sys_var_ulong Sys_max_binlog_size(
 static bool fix_max_connections(sys_var *self, THD *thd, enum_var_type type)
 {
 #ifndef EMBEDDED_LIBRARY
-  resize_thr_alarm(max_connections +
+  resize_thr_alarm(max_connections + extra_max_connections +
                    global_system_variables.max_insert_delayed_threads + 10);
 #endif
   return false;
@@ -1199,7 +1182,7 @@ static Sys_var_harows Sys_sql_max_join_size(
        SESSION_VAR(max_join_size), NO_CMD_LINE,
        VALID_RANGE(1, HA_POS_ERROR), DEFAULT(HA_POS_ERROR), BLOCK_SIZE(1),
        NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
-       ON_UPDATE(fix_max_join_size), DEPRECATED(70000, 0));
+       ON_UPDATE(fix_max_join_size), DEPRECATED(70000, "'@@max_join_size'"));
 
 static Sys_var_ulong Sys_max_long_data_size(
        "max_long_data_size",
@@ -1287,8 +1270,7 @@ static Sys_var_mybool Sys_named_pipe(
 #endif
 
 
-static bool 
-check_net_buffer_length(sys_var *self, THD *thd,  set_var *var)
+static bool check_net_buffer_length(sys_var *self, THD *thd,  set_var *var)
 {
   longlong val;
   if (session_readonly(self, thd, var))
@@ -1355,13 +1337,9 @@ static Sys_var_ulong Sys_net_retry_count(
        BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
        ON_UPDATE(fix_net_retry_count));
 
-static Sys_var_mybool Sys_new_mode(
-       "new", "Use very new possible \"unsafe\" functions",
-       SESSION_VAR(new_mode), CMD_LINE(OPT_ARG, 'n'), DEFAULT(FALSE));
-
 static Sys_var_mybool Sys_old_mode(
        "old", "Use compatible behavior",
-       READ_ONLY GLOBAL_VAR(old_mode), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+       SESSION_VAR(old_mode), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
 
 static Sys_var_mybool Sys_old_alter_table(
        "old_alter_table", "Use old, non-optimized alter table",
@@ -1422,10 +1400,28 @@ static Sys_var_ulong Sys_optimizer_search_depth(
        NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
        ON_UPDATE(fix_optimizer_search_depth));
 
-static const char *optimizer_switch_names[]=
-{
-  "index_merge", "index_merge_union", "index_merge_sort_union",
-  "index_merge_intersection", "engine_condition_pushdown",
+/* this is used in the sigsegv handler */
+export const char *optimizer_switch_names[]=
+{
+  "index_merge","index_merge_union","index_merge_sort_union",
+  "index_merge_intersection","index_merge_sort_intersection",
+  "engine_condition_pushdown",
+  "index_condition_pushdown",
+  "derived_merge", "derived_with_keys",
+  "firstmatch","loosescan","materialization","in_to_exists","semijoin",
+  "partial_match_rowid_merge",
+  "partial_match_table_scan",
+  "subquery_cache",
+  "mrr",
+  "mrr_cost_based",
+  "mrr_sort_keys",
+  "outer_join_with_cache",
+  "semijoin_with_cache",
+  "join_cache_incremental",
+  "join_cache_hashed",
+  "join_cache_bka",
+  "optimize_join_buffer_size",
+  "table_elimination",
   "default", NullS
 };
 /** propagates changes to @@engine_condition_pushdown */
@@ -1433,16 +1429,41 @@ static bool fix_optimizer_switch(sys_var *self, THD *thd,
                                  enum_var_type type)
 {
   SV *sv= (type == OPT_GLOBAL) ? &global_system_variables : &thd->variables;
-  sv->engine_condition_pushdown= 
+  sv->engine_condition_pushdown=
     test(sv->optimizer_switch & OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN);
   return false;
 }
 static Sys_var_flagset Sys_optimizer_switch(
        "optimizer_switch",
-       "optimizer_switch=option=val[,option=val...], where option is one of "
-       "{index_merge, index_merge_union, index_merge_sort_union, "
-       "index_merge_intersection, engine_condition_pushdown}"
-       " and val is one of {on, off, default}",
+       "optimizer_switch=option=val[,option=val...], where option is one of {"
+        "derived_merge, "
+        "derived_with_keys, "
+        "firstmatch, "
+        "in_to_exists, "
+        "engine_condition_pushdown, "
+        "index_condition_pushdown, "
+        "index_merge, "
+        "index_merge_intersection, "
+        "index_merge_sort_intersection, "
+        "index_merge_sort_union, "
+        "index_merge_union, "
+        "join_cache_bka, "
+        "join_cache_hashed, "
+        "join_cache_incremental, "
+        "loosescan, "
+        "materialization, "
+        "mrr, "
+        "mrr_cost_based, "
+        "mrr_sort_keys, "
+        "optimize_join_buffer_size, "
+        "outer_join_with_cache, "
+        "partial_match_rowid_merge, "
+        "partial_match_table_scan, "
+        "semijoin, "
+        "semijoin_with_cache, "
+        "subquery_cache, "
+        "table_elimination "
+       "} and val is one of {on, off, default}",
        SESSION_VAR(optimizer_switch), CMD_LINE(REQUIRED_ARG),
        optimizer_switch_names, DEFAULT(OPTIMIZER_SWITCH_DEFAULT),
        NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(NULL),
@@ -1628,9 +1649,11 @@ static Sys_var_ulong Sys_range_alloc_block_size(
        DEFAULT(RANGE_ALLOC_BLOCK_SIZE), BLOCK_SIZE(1024));
 
 static Sys_var_ulong Sys_multi_range_count(
-       "multi_range_count", "Number of key ranges to request at once",
+       "multi_range_count", "Ignored. Use mrr_buffer_size instead",
        SESSION_VAR(multi_range_count), CMD_LINE(REQUIRED_ARG),
-       VALID_RANGE(1, ULONG_MAX), DEFAULT(256), BLOCK_SIZE(1));
+       VALID_RANGE(1, ULONG_MAX), DEFAULT(256), BLOCK_SIZE(1),
+       NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0),
+       DEPRECATED(50700, "'@@mrr_buffer_size'"));
 
 static bool fix_thd_mem_root(sys_var *self, THD *thd, enum_var_type type)
 {
@@ -1752,13 +1775,19 @@ static Sys_var_ulong Sys_trans_prealloc_size(
 
 static const char *thread_handling_names[]=
 {
-  "one-thread-per-connection", "no-threads", "loaded-dynamically",
+  "one-thread-per-connection", "no-threads",
+#ifdef HAVE_POOL_OF_THREADS
+  "pool-of-threads",
+#endif
   0
 };
 static Sys_var_enum Sys_thread_handling(
        "thread_handling",
        "Define threads usage for handling queries, one of "
-       "one-thread-per-connection, no-threads, loaded-dynamically"
+       "one-thread-per-connection, no-threads"
+#ifdef HAVE_POOL_OF_THREADS
+       ", pool-of-threads"
+#endif
        , READ_ONLY GLOBAL_VAR(thread_handling), CMD_LINE(REQUIRED_ARG),
        thread_handling_names, DEFAULT(0));
 
@@ -1809,11 +1838,36 @@ static Sys_var_ulong Sys_query_cache_min_res_unit(
 static const char *query_cache_type_names[]= { "OFF", "ON", "DEMAND", 0 };
 static bool check_query_cache_type(sys_var *self, THD *thd, set_var *var)
 {
-  if (query_cache.is_disabled())
+  if (query_cache.is_disable_in_progress())
   {
     my_error(ER_QUERY_CACHE_DISABLED, MYF(0));
     return true;
   }
+  if (var->type != OPT_GLOBAL &&
+      global_system_variables.query_cache_type == 0 &&
+      var->value->val_int() != 0)
+  {
+    my_error(ER_QUERY_CACHE_IS_GLOBALY_DISABLED, MYF(0));
+    return true;
+  }
+
+  return false;
+}
+static bool fix_query_cache_type(sys_var *self, THD *thd, enum_var_type type)
+{
+  if (type != OPT_GLOBAL)
+    return false;
+
+  if (global_system_variables.query_cache_type != 0 &&
+      query_cache.is_disabled())
+  {
+    /* if disabling in progress variable will not be set */
+    DBUG_ASSERT(!query_cache.is_disable_in_progress());
+    /* Enable query cache because it was disabled */
+    fix_query_cache_size(0, thd, type);
+  }
+  else if (global_system_variables.query_cache_type == 0)
+    query_cache.disable_query_cache(thd);
   return false;
 }
 static Sys_var_enum Sys_query_cache_type(
@@ -1823,7 +1877,8 @@ static Sys_var_enum Sys_query_cache_type(
        "SELECT SQL_CACHE ... queries",
        SESSION_VAR(query_cache_type), CMD_LINE(REQUIRED_ARG),
        query_cache_type_names, DEFAULT(1), NO_MUTEX_GUARD, NOT_IN_BINLOG,
-       ON_CHECK(check_query_cache_type));
+       ON_CHECK(check_query_cache_type),
+       ON_UPDATE(fix_query_cache_type));
 
 static Sys_var_mybool Sys_query_cache_wlock_invalidate(
        "query_cache_wlock_invalidate",
@@ -1877,7 +1932,8 @@ static Sys_var_enum Slave_exec_mode(
        "between the master and the slave",
        GLOBAL_VAR(slave_exec_mode_options), CMD_LINE(REQUIRED_ARG),
        slave_exec_mode_names, DEFAULT(SLAVE_EXEC_MODE_STRICT));
-const char *slave_type_conversions_name[]= {"ALL_LOSSY", "ALL_NON_LOSSY", 0};
+
+static const char *slave_type_conversions_name[]= {"ALL_LOSSY", "ALL_NON_LOSSY", 0};
 static Sys_var_set Slave_type_conversions(
        "slave_type_conversions",
        "Set of slave type conversions that are enabled. Legal values are:"
@@ -1888,6 +1944,22 @@ static Sys_var_set Slave_type_conversions(
        GLOBAL_VAR(slave_type_conversions_options), CMD_LINE(REQUIRED_ARG),
        slave_type_conversions_name,
        DEFAULT(0));
+
+static Sys_var_mybool Sys_slave_sql_verify_checksum(
+       "slave_sql_verify_checksum",
+       "Force checksum verification of replication events after reading them "
+       "from relay log. Note: Events are always checksum-verified by slave on "
+       "receiving them from the network before writing them to the relay log",
+       GLOBAL_VAR(opt_slave_sql_verify_checksum), CMD_LINE(OPT_ARG),
+       DEFAULT(TRUE));
+
+static Sys_var_mybool Sys_master_verify_checksum(
+       "master_verify_checksum",
+       "Force checksum verification of logged events in the binary log before "
+       "sending them to slaves or printing them in the output of "
+       "SHOW BINLOG EVENTS",
+       GLOBAL_VAR(opt_master_verify_checksum), CMD_LINE(OPT_ARG),
+       DEFAULT(FALSE));
 #endif
 
 
@@ -1980,7 +2052,8 @@ static bool fix_sql_mode(sys_var *self, THD *thd, enum_var_type type)
 */
 static const char *sql_mode_names[]=
 {
-  "REAL_AS_FLOAT", "PIPES_AS_CONCAT", "ANSI_QUOTES", "IGNORE_SPACE", ",",
+  "REAL_AS_FLOAT", "PIPES_AS_CONCAT", "ANSI_QUOTES", "IGNORE_SPACE",
+  "IGNORE_BAD_TABLE_OPTIONS",
   "ONLY_FULL_GROUP_BY", "NO_UNSIGNED_SUBTRACTION", "NO_DIR_IN_CREATE",
   "POSTGRESQL", "ORACLE", "MSSQL", "DB2", "MAXDB", "NO_KEY_OPTIONS",
   "NO_TABLE_OPTIONS", "NO_FIELD_OPTIONS", "MYSQL323", "MYSQL40", "ANSI",
@@ -2085,6 +2158,15 @@ static Sys_var_ulong Sys_thread_cache_size(
        GLOBAL_VAR(thread_cache_size), CMD_LINE(REQUIRED_ARG),
        VALID_RANGE(0, 16384), DEFAULT(0), BLOCK_SIZE(1));
 
+#ifdef HAVE_POOL_OF_THREADS
+static Sys_var_ulong Sys_thread_pool_size(
+       "thread_pool_size",
+       "How many threads we should create to handle query requests in "
+       "case of 'thread_handling=pool-of-threads'",
+       GLOBAL_VAR(thread_pool_size), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(1, 16384), DEFAULT(20), BLOCK_SIZE(0));
+#endif
+
 /**
   Can't change the 'next' tx_isolation if we are already in a
   transaction.
@@ -2101,32 +2183,6 @@ static bool check_tx_isolation(sys_var *self, THD *thd, set_var *var)
   return FALSE;
 }
 
-
-bool Sys_var_tx_isolation::session_update(THD *thd, set_var *var)
-{
-  if (var->type == OPT_SESSION && Sys_var_enum::session_update(thd, var))
-    return TRUE;
-  if (var->type == OPT_DEFAULT || !thd->in_active_multi_stmt_transaction())
-  {
-    /*
-      Update the isolation level of the next transaction.
-      I.e. if one did:
-      COMMIT;
-      SET SESSION ISOLATION LEVEL ...
-      BEGIN; <-- this transaction has the new isolation
-      Note, that in case of:
-      COMMIT;
-      SET TRANSACTION ISOLATION LEVEL ...
-      SET SESSION ISOLATION LEVEL ...
-      BEGIN; <-- the session isolation level is used, not the
-      result of SET TRANSACTION statement.
-     */
-    thd->tx_isolation= (enum_tx_isolation) var->save_result.ulonglong_value;
-  }
-  return FALSE;
-}
-
-
 // NO_CMD_LINE - different name of the option
 static Sys_var_tx_isolation Sys_tx_isolation(
        "tx_isolation", "Default transaction isolation level",
@@ -2137,7 +2193,7 @@ static Sys_var_tx_isolation Sys_tx_isolation(
 static Sys_var_ulonglong Sys_tmp_table_size(
        "tmp_table_size",
        "If an internal in-memory temporary table exceeds this size, MySQL "
-       "will automatically convert it to an on-disk MyISAM table",
+       "will automatically convert it to an on-disk MyISAM or Aria table",
        SESSION_VAR(tmp_table_size), CMD_LINE(REQUIRED_ARG),
        VALID_RANGE(1024, (ulonglong)~(intptr)0), DEFAULT(16*1024*1024),
        BLOCK_SIZE(1));
@@ -2463,41 +2519,24 @@ static Sys_var_harows Sys_select_limit(
 static bool update_timestamp(THD *thd, set_var *var)
 {
   if (var->value)
-    thd->set_time((time_t) var->save_result.ulonglong_value);
+  {
+    my_hrtime_t hrtime = { hrtime_from_time(var->save_result.double_value) };
+    thd->set_time(hrtime);
+  }
   else // SET timestamp=DEFAULT
-    thd->user_time= 0;
+    thd->user_time.val= 0;
   return false;
 }
-static ulonglong read_timestamp(THD *thd)
+static double read_timestamp(THD *thd)
 {
-  return (ulonglong) thd->start_time;
+  return thd->start_time +
+         thd->start_time_sec_part/(double)TIME_SECOND_PART_FACTOR;
 }
-
-
-static bool check_timestamp(sys_var *self, THD *thd, set_var *var)
-{
-  longlong val;
-
-  if (!var->value)
-    return FALSE;
-
-  val= (longlong) var->save_result.ulonglong_value;
-  if (val != 0 &&          // this is how you set the default value
-      (val < TIMESTAMP_MIN_VALUE || val > TIMESTAMP_MAX_VALUE))
-  {
-    char buf[64];
-    my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "timestamp", llstr(val, buf));
-    return TRUE;
-  }
-  return FALSE;
-}
-
-
-static Sys_var_session_special Sys_timestamp(
+static Sys_var_session_special_double Sys_timestamp(
        "timestamp", "Set the time for this client",
        sys_var::ONLY_SESSION, NO_CMD_LINE,
-       VALID_RANGE(0, ~(time_t)0), BLOCK_SIZE(1),
-       NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_timestamp), 
+       VALID_RANGE(0, TIMESTAMP_MAX_VALUE),
+       NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(0), 
        ON_UPDATE(update_timestamp), ON_READ(read_timestamp));
 
 static bool update_last_insert_id(THD *thd, set_var *var)
@@ -2753,9 +2792,7 @@ static bool fix_log(char** logname, const char* default_logname,
 {
   if (!*logname) // SET ... = DEFAULT
   {
-    char buff[FN_REFLEN];
-    *logname= my_strdup(make_log_name(buff, default_logname, ext),
-                        MYF(MY_FAE+MY_WME));
+    make_default_log_name(logname, ext, false);
     if (!*logname)
       return true;
   }
@@ -2774,7 +2811,7 @@ static void reopen_general_log(char* name)
 }
 static bool fix_general_log_file(sys_var *self, THD *thd, enum_var_type type)
 {
-  return fix_log(&opt_logname, default_logfile_name, ".log", opt_log,
+  return fix_log(&opt_logname,  opt_log_basename, ".log", opt_log,
                  reopen_general_log);
 }
 static Sys_var_charptr Sys_general_log_path(
@@ -2790,7 +2827,7 @@ static void reopen_slow_log(char* name)
 }
 static bool fix_slow_log_file(sys_var *self, THD *thd, enum_var_type type)
 {
-  return fix_log(&opt_slow_logname, default_logfile_name, "-slow.log",
+  return fix_log(&opt_slow_logname, opt_log_basename, "-slow.log",
                  opt_slow_log, reopen_slow_log);
 }
 static Sys_var_charptr Sys_slow_log_path(
@@ -3174,3 +3211,209 @@ static Sys_var_tz Sys_time_zone(
        SESSION_VAR(time_zone), NO_CMD_LINE,
        DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG);
 
+export const char *plugin_maturity_names[]=
+{ "unknown", "experimental", "alpha", "beta", "gamma", "stable", 0 };
+static Sys_var_enum Sys_plugin_maturity(
+       "plugin_maturity",
+       "The lowest desirable plugin maturity "
+       "(unknown, experimental, alpha, beta, gamma, or stable). "
+       "Plugins less mature than that will not be installed or loaded.",
+       READ_ONLY GLOBAL_VAR(plugin_maturity), CMD_LINE(REQUIRED_ARG),
+       plugin_maturity_names, DEFAULT(MariaDB_PLUGIN_MATURITY_UNKNOWN));
+
+static Sys_var_ulong Sys_deadlock_search_depth_short(
+       "deadlock_search_depth_short",
+       "Short search depth for the two-step deadlock detection",
+       SESSION_VAR(wt_deadlock_search_depth_short), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, 32), DEFAULT(4), BLOCK_SIZE(1));
+
+static Sys_var_ulong Sys_deadlock_search_depth_long(
+       "deadlock_search_depth_long",
+       "Long search depth for the two-step deadlock detection",
+       SESSION_VAR(wt_deadlock_search_depth_long), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, 33), DEFAULT(15), BLOCK_SIZE(1));
+
+static Sys_var_ulong Sys_deadlock_timeout_depth_short(
+       "deadlock_timeout_short",
+       "Short timeout for the two-step deadlock detection (in microseconds)",
+       SESSION_VAR(wt_timeout_short), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, ULONG_MAX), DEFAULT(10000), BLOCK_SIZE(1));
+
+static Sys_var_ulong Sys_deadlock_timeout_depth_long(
+       "deadlock_timeout_long",
+       "Long timeout for the two-step deadlock detection (in microseconds)",
+       SESSION_VAR(wt_timeout_long), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, ULONG_MAX), DEFAULT(50000000), BLOCK_SIZE(1));
+
+#ifndef DBUG_OFF
+static Sys_var_ulong Sys_debug_crc_break(
+       "debug_crc_break",
+       "Call my_debug_put_break_here() if crc matches this number (for debug)",
+       GLOBAL_VAR(my_crc_dbug_check), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, ULONG_MAX), DEFAULT(0), BLOCK_SIZE(1));
+#endif
+
+static Sys_var_uint Sys_extra_port(
+       "extra_port",
+       "Extra port number to use for tcp connections in a "
+       "one-thread-per-connection manner. 0 means don't use another port",
+       READ_ONLY GLOBAL_VAR(mysqld_extra_port), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, UINT_MAX32), DEFAULT(0), BLOCK_SIZE(1));
+
+static Sys_var_ulong Sys_extra_max_connections(
+       "extra_max_connections", "The number of connections on extra-port",
+       GLOBAL_VAR(extra_max_connections), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(1, 100000), DEFAULT(1), BLOCK_SIZE(1), NO_MUTEX_GUARD,
+       NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(fix_max_connections));
+
+#ifdef SAFE_MUTEX
+static Sys_var_mybool Sys_mutex_deadlock_detector(
+       "mutex_deadlock_detector", "Enable checking of wrong mutex usage",
+       READ_ONLY GLOBAL_VAR(safe_mutex_deadlock_detector),
+       CMD_LINE(OPT_ARG), DEFAULT(TRUE));
+#endif
+
+static Sys_var_keycache Sys_key_cache_segments(
+       "key_cache_segments", "The number of segments in a key cache",
+       KEYCACHE_VAR(param_partitions),
+       CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_PARTITIONS),
+       VALID_RANGE(0, MAX_KEY_CACHE_PARTITIONS),
+       DEFAULT(DEFAULT_KEY_CACHE_PARTITIONS),
+       BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+       ON_UPDATE(repartition_keycache));
+
+static const char *log_slow_filter_names[]= 
+{ "admin", "filesort", "filesort_on_disk", "full_join", "full_scan",
+  "query_cache", "query_cache_miss", "tmp_table", "tmp_table_on_disk", 0
+};
+static Sys_var_set Sys_log_slow_filter(
+       "log_slow_filter",
+       "Log only certain types of queries. Multiple "
+       "flags can be specified, separated by commas. Valid values are admin, "
+       "slave, filesort, filesort_on_disk, full_join, full_scan, query_cache, "
+       "query_cache_miss, tmp_table, tmp_table_on_disk",
+       SESSION_VAR(log_slow_filter), CMD_LINE(REQUIRED_ARG),
+       log_slow_filter_names,
+       DEFAULT(MAX_SET(array_elements(log_slow_filter_names)-1)));
+
+static Sys_var_ulong Sys_log_slow_rate_limit(
+       "log_slow_rate_limit",
+       "Write to slow log every #th slow query. Set to 1 to log everything. "
+       "Increase it to reduce the size of the slow or the performance impact "
+       "of slow logging",
+       SESSION_VAR(log_slow_rate_limit), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(1, ULONG_MAX), DEFAULT(1), BLOCK_SIZE(1));
+
+static const char *log_slow_verbosity_names[]= { "innodb", "query_plan", 0 };
+static Sys_var_set Sys_log_slow_verbosity(
+       "log_slow_verbosity",
+       "log-slow-verbosity=[value[,value ...]] where value is one of "
+       "'innodb', 'query_plan'",
+       SESSION_VAR(log_slow_verbosity), CMD_LINE(REQUIRED_ARG),
+       log_slow_verbosity_names, DEFAULT(LOG_SLOW_VERBOSITY_INIT));
+
+static Sys_var_ulong Sys_join_cache_level(
+       "join_cache_level",
+       "Controls what join operations can be executed with join buffers. Odd "
+       "numbers are used for plain join buffers while even numbers are used "
+       "for linked buffers",
+       SESSION_VAR(join_cache_level), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, 8), DEFAULT(1), BLOCK_SIZE(1));
+
+static Sys_var_ulong Sys_mrr_buffer_size(
+       "mrr_buffer_size",
+       "Size of buffer to use when using MRR with range access",
+       SESSION_VAR(mrr_buff_size), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(IO_SIZE*2, INT_MAX32), DEFAULT(256*1024), BLOCK_SIZE(1));
+
+static Sys_var_ulong Sys_rowid_merge_buff_size(
+       "rowid_merge_buff_size",
+       "The size of the buffers used [NOT] IN evaluation via partial matching",
+       SESSION_VAR(rowid_merge_buff_size), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, ((ulonglong)~(intptr)0)/2), DEFAULT(8*1024*1024),
+       BLOCK_SIZE(1));
+
+static Sys_var_mybool Sys_userstat(
+       "userstat",
+       "Enables statistics gathering for USER_STATISTICS, CLIENT_STATISTICS, "
+       "INDEX_STATISTICS and TABLE_STATISTICS tables in the INFORMATION_SCHEMA",
+       GLOBAL_VAR(opt_userstat_running),
+       CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_binlog_annotate_row_events(
+       "binlog_annotate_rows_events",
+       "Tells the master to annotate RBR events with the statement that "
+       "caused these events",
+       SESSION_VAR(binlog_annotate_rows_events), CMD_LINE(OPT_ARG),
+       DEFAULT(FALSE));
+
+#ifdef HAVE_REPLICATION
+static Sys_var_mybool Sys_replicate_annotate_rows_events(
+       "replicate_annotate_rows_events",
+       "Tells the slave to write annotate rows events recieved from the master "
+       "to its own binary log. Ignored if log_slave_updates is not set",
+       READ_ONLY GLOBAL_VAR(opt_replicate_annotate_rows_events),
+       CMD_LINE(OPT_ARG), DEFAULT(0));
+#endif
+
+#if 0
+static Sys_var_mybool Sys_safemalloc(
+       "safemalloc",
+       "Check all memory allocations for every malloc/free call (can be slow)",
+       GLOBAL_VAR(sf_malloc_trough_check),
+       CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+#endif
+
+static Sys_var_ulonglong Sys_join_buffer_space_limit(
+       "join_buffer_space_limit",
+       "The limit of the space for all join buffers used by a query",
+       SESSION_VAR(join_buff_space_limit), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(2048, ULONGLONG_MAX), DEFAULT(16*128*1024),
+       BLOCK_SIZE(2048));
+
+static Sys_var_ulong Sys_progress_report_time(
+       "progress_report_time",
+       "Seconds between sending progress reports to the client for "
+       "time-consuming statements. Set to 0 to disable progress reporting.",
+       SESSION_VAR(progress_report_time), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, ULONG_MAX), DEFAULT(56), BLOCK_SIZE(1));
+
+static Sys_var_mybool Sys_thread_alarm(
+       "thread_alarm",
+       "Enable system thread alarm calls. Disabling it may be useful "
+       "in debugging or testing, never do it in production",
+       READ_ONLY GLOBAL_VAR(opt_thread_alarm), CMD_LINE(OPT_ARG),
+       DEFAULT(TRUE));
+
+#if 0
+static Sys_var_charptr Sys_log_basename(
+       "log_basename",
+       "Basename for all log files and the .pid file. This sets all log file "
+       "names at once (in 'datadir') and is normally the only option you need "
+       "for specifying log files. This is especially recommend to be set if you "
+       "are using replication as it ensures that your log file names are not "
+       "depending on your host name. Sets names for --log-bin, --log-bin-index, "
+       "--relay-log, --relay-log-index, --general-log-file, "
+       "--log-slow-query-log-file, --log-error-file and --pid-file",
+       READ_ONLY GLOBAL_VAR(opt_log_basename),
+       CMD_LINE(REQUIRED_ARG, OPT_LOG_BASENAME),
+       IN_FS_CHARSET, DEFAULT(0));
+#endif
+
+static Sys_var_mybool Sys_query_cache_strip_comments(
+       "query_cache_strip_comments",
+       "Strip all comments from a query before storing it "
+       "in the query cache",
+       GLOBAL_VAR(opt_query_cache_strip_comments), CMD_LINE(OPT_ARG),
+       DEFAULT(FALSE));
+
+static ulonglong in_transaction(THD *thd)
+{
+  return test(thd->server_status & SERVER_STATUS_IN_TRANS);
+}
+static Sys_var_session_special Sys_in_transaction(
+       "in_transaction", "Whether there is an active transaction",
+       READ_ONLY sys_var::ONLY_SESSION, NO_CMD_LINE,
+       VALID_RANGE(0, 1), BLOCK_SIZE(1), NO_MUTEX_GUARD,
+       NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), ON_READ(in_transaction));
+
diff --git a/sql/sys_vars.h b/sql/sys_vars.h
index 14d99407f37..1b9a22b1db7 100644
--- a/sql/sys_vars.h
+++ b/sql/sys_vars.h
@@ -53,6 +53,7 @@
 #define READ_ONLY sys_var::READONLY+
 // this means that Sys_var_charptr initial value was malloc()ed
 #define PREALLOCATED sys_var::ALLOCATED+
+#define PARSED_EARLY sys_var::PARSE_EARLY+
 /*
   Sys_var_bit meaning is reversed, like in
   @@foreign_key_checks <-> OPTION_NO_FOREIGN_KEY_CHECKS
@@ -109,12 +110,10 @@ public:
           enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
           on_check_function on_check_func=0,
           on_update_function on_update_func=0,
-          uint deprecated_version=0, const char *substitute=0,
-          int parse_flag= PARSE_NORMAL)
+          uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id,
               getopt.arg_type, SHOWT, def_val, lock, binlog_status_arg,
-              on_check_func, on_update_func, deprecated_version,
-              substitute, parse_flag)
+              on_check_func, on_update_func, deprecated_version, substitute)
   {
     option.var_type= ARGT;
     option.min_value= min_val;
@@ -197,11 +196,11 @@ public:
           ulonglong def_val, PolyLock *lock,
           enum binlog_status_enum binlog_status_arg,
           on_check_function on_check_func, on_update_function on_update_func,
-          uint deprecated_version, const char *substitute, int parse_flag= PARSE_NORMAL)
+          uint deprecated_version, const char *substitute)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id,
               getopt.arg_type, show_val_type_arg, def_val, lock,
               binlog_status_arg, on_check_func,
-              on_update_func, deprecated_version, substitute, parse_flag)
+              on_update_func, deprecated_version, substitute)
   {
     for (typelib.count= 0; values[typelib.count]; typelib.count++) /*no-op */;
     typelib.name="";
@@ -311,12 +310,11 @@ public:
           enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
           on_check_function on_check_func=0,
           on_update_function on_update_func=0,
-          uint deprecated_version=0, const char *substitute=0,
-          int parse_flag= PARSE_NORMAL)
+          uint deprecated_version=0, const char *substitute=0)
     : Sys_var_typelib(name_arg, comment, flag_args, off, getopt,
                       SHOW_MY_BOOL, bool_values, def_val, lock,
                       binlog_status_arg, on_check_func, on_update_func,
-                      deprecated_version, substitute, parse_flag)
+                      deprecated_version, substitute)
   {
     option.var_type= GET_BOOL;
     global_var(my_bool)= def_val;
@@ -367,12 +365,11 @@ public:
           enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
           on_check_function on_check_func=0,
           on_update_function on_update_func=0,
-          uint deprecated_version=0, const char *substitute=0,
-          int parse_flag= PARSE_NORMAL)
+          uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id,
               getopt.arg_type, SHOW_CHAR_PTR, (intptr)def_val,
               lock, binlog_status_arg, on_check_func, on_update_func,
-              deprecated_version, substitute, parse_flag)
+              deprecated_version, substitute)
   {
     is_os_charset= is_os_charset_arg == IN_FS_CHARSET;
     /*
@@ -461,7 +458,7 @@ public:
     : sys_var(&all_sys_vars, name_arg, comment,
               sys_var::READONLY+sys_var::ONLY_SESSION, 0, -1,
               NO_ARG, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG,
-              NULL, NULL, 0, NULL, PARSE_NORMAL)
+              NULL, NULL, 0, NULL)
   {
     is_os_charset= is_os_charset_arg == IN_FS_CHARSET;
     option.var_type= GET_STR;
@@ -574,12 +571,11 @@ public:
                enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
                on_check_function on_check_func=0,
                on_update_function on_update_func=0,
-               uint deprecated_version=0, const char *substitute=0,
-               int parse_flag= PARSE_NORMAL)
+               uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, 0, getopt.id,
               getopt.arg_type, SHOW_CHAR, (intptr)def_val,
               lock, binlog_status_arg, on_check_func, on_update_func,
-              deprecated_version, substitute, parse_flag)
+              deprecated_version, substitute)
   { option.var_type= GET_NO_ARG; }
   bool do_check(THD *thd, set_var *var)
   {
@@ -752,7 +748,7 @@ static bool update_buffer_size(THD *thd, KEY_CACHE *key_cache,
   mysql_mutex_unlock(&LOCK_global_system_variables);
 
   if (!key_cache->key_cache_inited)
-    error= ha_init_key_cache(0, key_cache);
+    error= ha_init_key_cache(0, key_cache, 0);
   else
     error= ha_resize_key_cache(key_cache);
 
@@ -762,8 +758,9 @@ static bool update_buffer_size(THD *thd, KEY_CACHE *key_cache,
   return error;
 }
 
-static bool update_keycache_param(THD *thd, KEY_CACHE *key_cache,
-                                  ptrdiff_t offset, ulonglong new_value)
+static bool update_keycache(THD *thd, KEY_CACHE *key_cache,
+                            ptrdiff_t offset, ulonglong new_value,
+                            int (*func)(KEY_CACHE *))
 {
   bool error= false;
   DBUG_ASSERT(offset != offsetof(KEY_CACHE, param_buff_size));
@@ -772,14 +769,35 @@ static bool update_keycache_param(THD *thd, KEY_CACHE *key_cache,
 
   key_cache->in_init= 1;
   mysql_mutex_unlock(&LOCK_global_system_variables);
-  error= ha_resize_key_cache(key_cache);
-
+  error= func(key_cache);
   mysql_mutex_lock(&LOCK_global_system_variables);
   key_cache->in_init= 0;
 
   return error;
 }
 
+static bool resize_keycache(THD *thd, KEY_CACHE *key_cache,
+                            ptrdiff_t offset, ulonglong new_value)
+{
+  return update_keycache(thd, key_cache, offset, new_value,
+                         ha_resize_key_cache);
+}
+
+static bool change_keycache_param(THD *thd, KEY_CACHE *key_cache,
+                                  ptrdiff_t offset, ulonglong new_value)
+{
+  return update_keycache(thd, key_cache, offset, new_value,
+                         ha_change_key_cache_param);
+}
+
+static bool repartition_keycache(THD *thd, KEY_CACHE *key_cache,
+                                 ptrdiff_t offset, ulonglong new_value)
+{
+  return update_keycache(thd, key_cache, offset, new_value,
+                         ha_repartition_key_cache);
+}
+
+
 /**
   The class for floating point variables
 
@@ -797,12 +815,11 @@ public:
           enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
           on_check_function on_check_func=0,
           on_update_function on_update_func=0,
-          uint deprecated_version=0, const char *substitute=0,
-          int parse_flag= PARSE_NORMAL)
+          uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id,
               getopt.arg_type, SHOW_DOUBLE, (longlong) double2ulonglong(def_val),
               lock, binlog_status_arg, on_check_func, on_update_func,
-              deprecated_version, substitute, parse_flag)
+              deprecated_version, substitute)
   {
     option.var_type= GET_DOUBLE;
     option.min_value= (longlong) double2ulonglong(min_val);
@@ -1024,7 +1041,7 @@ public:
     global_var(ulonglong)= def_val;
     DBUG_ASSERT(typelib.count > 0);
     DBUG_ASSERT(typelib.count <= 64);
-    DBUG_ASSERT(def_val < MAX_SET(typelib.count));
+    DBUG_ASSERT(def_val <= MAX_SET(typelib.count));
     DBUG_ASSERT(size == sizeof(ulonglong));
   }
   bool do_check(THD *thd, set_var *var)
@@ -1120,12 +1137,11 @@ public:
           enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
           on_check_function on_check_func=0,
           on_update_function on_update_func=0,
-          uint deprecated_version=0, const char *substitute=0,
-          int parse_flag= PARSE_NORMAL)
+          uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id,
               getopt.arg_type, SHOW_CHAR, (intptr)def_val,
               lock, binlog_status_arg, on_check_func, on_update_func,
-              deprecated_version, substitute, parse_flag),
+              deprecated_version, substitute),
     plugin_type(plugin_type_arg)
   {
     option.var_type= GET_STR;
@@ -1167,7 +1183,7 @@ public:
     plugin_ref oldval= *valptr;
     if (oldval != newval)
     {
-      *valptr= my_plugin_lock(NULL, &newval);
+      *valptr= my_plugin_lock(NULL, newval);
       plugin_unlock(NULL, oldval);
     }
   }
@@ -1186,7 +1202,7 @@ public:
   void session_save_default(THD *thd, set_var *var)
   {
     plugin_ref plugin= global_var(plugin_ref);
-    var->save_result.plugin= my_plugin_lock(thd, &plugin);
+    var->save_result.plugin= my_plugin_lock(thd, plugin);
   }
   void global_save_default(THD *thd, set_var *var)
   {
@@ -1202,7 +1218,7 @@ public:
       plugin= my_plugin_lock_by_name(thd, &pname, plugin_type);
     DBUG_ASSERT(plugin);
 
-    var->save_result.plugin= my_plugin_lock(thd, &plugin);
+    var->save_result.plugin= my_plugin_lock(thd, plugin);
   }
   bool check_update_type(Item_result type)
   { return type != STRING_RESULT; }
@@ -1234,12 +1250,11 @@ public:
                enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
                on_check_function on_check_func=0,
                on_update_function on_update_func=0,
-               uint deprecated_version=0, const char *substitute=0,
-               int parse_flag= PARSE_NORMAL)
+               uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, 0, getopt.id,
               getopt.arg_type, SHOW_CHAR, (intptr)def_val,
               lock, binlog_status_arg, on_check_func, on_update_func,
-              deprecated_version, substitute, parse_flag)
+              deprecated_version, substitute)
   {
     DBUG_ASSERT(scope() == ONLY_SESSION);
     option.var_type= GET_NO_ARG;
@@ -1433,6 +1448,57 @@ public:
   }
 };
 
+
+class Sys_var_session_special_double: public Sys_var_double
+{
+  typedef bool (*session_special_update_function)(THD *thd, set_var *var);
+  typedef double (*session_special_read_function)(THD *thd);
+
+  session_special_read_function read_func;
+  session_special_update_function update_func;
+public:
+  Sys_var_session_special_double(const char *name_arg,
+               const char *comment, int flag_args,
+               CMD_LINE getopt,
+               double min_val, double max_val,
+               PolyLock *lock, enum binlog_status_enum binlog_status_arg,
+               on_check_function on_check_func,
+               session_special_update_function update_func_arg,
+               session_special_read_function read_func_arg,
+               uint deprecated_version=0, const char *substitute=0)
+    : Sys_var_double(name_arg, comment, flag_args, 0,
+              sizeof(double), getopt, min_val,
+              max_val, 0, lock, binlog_status_arg, on_check_func, 0,
+              deprecated_version, substitute),
+      read_func(read_func_arg), update_func(update_func_arg)
+  {
+    DBUG_ASSERT(scope() == ONLY_SESSION);
+    DBUG_ASSERT(getopt.id == -1); // NO_CMD_LINE, because the offset is fake
+  }
+  bool session_update(THD *thd, set_var *var)
+  { return update_func(thd, var); }
+  bool global_update(THD *thd, set_var *var)
+  {
+    DBUG_ASSERT(FALSE);
+    return true;
+  }
+  void session_save_default(THD *thd, set_var *var)
+  { var->value= 0; }
+  void global_save_default(THD *thd, set_var *var)
+  { DBUG_ASSERT(FALSE); }
+  uchar *session_value_ptr(THD *thd, LEX_STRING *base)
+  {
+    thd->sys_var_tmp.double_value= read_func(thd);
+    return (uchar*) &thd->sys_var_tmp.double_value;
+  }
+  uchar *global_value_ptr(THD *thd, LEX_STRING *base)
+  {
+    DBUG_ASSERT(FALSE);
+    return 0;
+  }
+};
+
+
 /**
   The class for read-only variables that show whether a particular
   feature is supported by the server. Example: have_compression
@@ -1453,12 +1519,11 @@ public:
                enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
                on_check_function on_check_func=0,
                on_update_function on_update_func=0,
-               uint deprecated_version=0, const char *substitute=0,
-               int parse_flag= PARSE_NORMAL)
+               uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id,
               getopt.arg_type, SHOW_CHAR, 0,
               lock, binlog_status_arg, on_check_func, on_update_func,
-              deprecated_version, substitute, parse_flag)
+              deprecated_version, substitute)
   {
     DBUG_ASSERT(scope() == GLOBAL);
     DBUG_ASSERT(getopt.id == -1);
@@ -1522,12 +1587,11 @@ public:
           enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
           on_check_function on_check_func=0,
           on_update_function on_update_func=0,
-          uint deprecated_version=0, const char *substitute=0,
-          int parse_flag= PARSE_NORMAL)
+          uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id,
               getopt.arg_type, SHOW_CHAR, (intptr)def_val,
               lock, binlog_status_arg, on_check_func, on_update_func,
-              deprecated_version, substitute, parse_flag),
+              deprecated_version, substitute),
       name_offset(name_off)
   {
     option.var_type= GET_STR;
@@ -1546,12 +1610,12 @@ public:
   { return false; }
   bool session_update(THD *thd, set_var *var)
   {
-    session_var(thd, void*)= var->save_result.ptr;
+    session_var(thd, const void*)= var->save_result.ptr;
     return false;
   }
   bool global_update(THD *thd, set_var *var)
   {
-    global_var(void*)= var->save_result.ptr;
+    global_var(const void*)= var->save_result.ptr;
     return false;
   }
   void session_save_default(THD *thd, set_var *var)
@@ -1595,12 +1659,11 @@ public:
              enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
              on_check_function on_check_func=0,
              on_update_function on_update_func=0,
-             uint deprecated_version=0, const char *substitute=0,
-             int parse_flag= PARSE_NORMAL)
+             uint deprecated_version=0, const char *substitute=0)
     : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id,
               getopt.arg_type, SHOW_CHAR, (intptr)def_val,
               lock, binlog_status_arg, on_check_func, on_update_func,
-              deprecated_version, substitute, parse_flag)
+              deprecated_version, substitute)
   {
     DBUG_ASSERT(getopt.id == -1);
     DBUG_ASSERT(size == sizeof(Time_zone *));
@@ -1662,7 +1725,16 @@ public:
   { return type != STRING_RESULT; }
 };
 
+/**
+  Special implementation for transaction isolation, that
+  distingushes between
+
+  SET GLOBAL TRANSACTION ISOLATION (stored in global_system_variables)
+  SET SESSION TRANSACTION ISOLATION (stored in thd->variables)
+  SET TRANSACTION ISOLATION (stored in thd->tx_isolation)
 
+  where the last statement sets isolation level for the next transaction only
+*/
 class Sys_var_tx_isolation: public Sys_var_enum
 {
 public:
@@ -1675,7 +1747,14 @@ public:
     :Sys_var_enum(name_arg, comment, flag_args, off, size, getopt,
                   values, def_val, lock, binlog_status_arg, on_check_func)
   {}
-  virtual bool session_update(THD *thd, set_var *var);
+  bool session_update(THD *thd, set_var *var)
+  {
+    if (var->type == OPT_SESSION && Sys_var_enum::session_update(thd, var))
+      return TRUE;
+    if (var->type == OPT_DEFAULT || !thd->in_active_multi_stmt_transaction())
+      thd->tx_isolation= (enum_tx_isolation) var->save_result.ulonglong_value;
+    return FALSE;
+  }
 };
 
 /****************************************************************************
diff --git a/sql/table.cc b/sql/table.cc
index be71663e4c3..a157dbeb15a 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -1,5 +1,6 @@
 /*
-   Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -32,9 +33,12 @@
                                  // fix_partition_func, partition_info
 #include "sql_acl.h"             // *_ACL, acl_getroot_no_password
 #include "sql_base.h"            // release_table_share
+#include "create_options.h"
 #include <m_ctype.h>
 #include "my_md5.h"
+#include "my_bit.h"
 #include "sql_select.h"
+#include "sql_derived.h"
 #include "mdl.h"                 // MDL_wait_for_graph_visitor
 
 /* INFORMATION_SCHEMA name */
@@ -52,6 +56,12 @@ LEX_STRING GENERAL_LOG_NAME= {C_STRING_WITH_LEN("general_log")};
 /* SLOW_LOG name */
 LEX_STRING SLOW_LOG_NAME= {C_STRING_WITH_LEN("slow_log")};
 
+/* 
+  Keyword added as a prefix when parsing the defining expression for a
+  virtual column read from the column definition saved in the frm file
+*/
+LEX_STRING parse_vcol_keyword= { C_STRING_WITH_LEN("PARSE_VCOL_EXPR ") };
+
 	/* Functions defined in this file */
 
 void open_table_error(TABLE_SHARE *share, int error, int db_errno,
@@ -513,34 +523,34 @@ inline bool is_system_table_name(const char *name, uint length)
   CHARSET_INFO *ci= system_charset_info;
 
   return (
-           /* mysql.proc table */
-           (length == 4 &&
-             my_tolower(ci, name[0]) == 'p' && 
-             my_tolower(ci, name[1]) == 'r' &&
-             my_tolower(ci, name[2]) == 'o' &&
-             my_tolower(ci, name[3]) == 'c') ||
-
-           (length > 4 &&
-             (
-               /* one of mysql.help* tables */
-               (my_tolower(ci, name[0]) == 'h' &&
-                 my_tolower(ci, name[1]) == 'e' &&
-                 my_tolower(ci, name[2]) == 'l' &&
-                 my_tolower(ci, name[3]) == 'p') ||
-
-               /* one of mysql.time_zone* tables */
-               (my_tolower(ci, name[0]) == 't' &&
-                 my_tolower(ci, name[1]) == 'i' &&
-                 my_tolower(ci, name[2]) == 'm' &&
-                 my_tolower(ci, name[3]) == 'e') ||
-
-               /* mysql.event table */
-               (my_tolower(ci, name[0]) == 'e' &&
-                 my_tolower(ci, name[1]) == 'v' &&
-                 my_tolower(ci, name[2]) == 'e' &&
-                 my_tolower(ci, name[3]) == 'n' &&
-                 my_tolower(ci, name[4]) == 't')
-             )
+          /* mysql.proc table */
+          (length == 4 &&
+           my_tolower(ci, name[0]) == 'p' && 
+           my_tolower(ci, name[1]) == 'r' &&
+           my_tolower(ci, name[2]) == 'o' &&
+           my_tolower(ci, name[3]) == 'c') ||
+
+          (length > 4 &&
+           (
+            /* one of mysql.help* tables */
+            (my_tolower(ci, name[0]) == 'h' &&
+             my_tolower(ci, name[1]) == 'e' &&
+             my_tolower(ci, name[2]) == 'l' &&
+             my_tolower(ci, name[3]) == 'p') ||
+
+            /* one of mysql.time_zone* tables */
+            (my_tolower(ci, name[0]) == 't' &&
+             my_tolower(ci, name[1]) == 'i' &&
+             my_tolower(ci, name[2]) == 'm' &&
+             my_tolower(ci, name[3]) == 'e') ||
+
+            /* mysql.event table */
+            (my_tolower(ci, name[0]) == 'e' &&
+             my_tolower(ci, name[1]) == 'v' &&
+             my_tolower(ci, name[2]) == 'e' &&
+             my_tolower(ci, name[3]) == 'n' &&
+             my_tolower(ci, name[4]) == 't')
+            )
            )
          );
 }
@@ -550,11 +560,11 @@ inline bool is_system_table_name(const char *name, uint length)
   Check if a string contains path elements
 */  
 
-static inline bool has_disabled_path_chars(const char *str)
+static bool has_disabled_path_chars(const char *str)
 {
   for (; *str; str++)
-    switch (*str)
-    {
+  {
+    switch (*str) {
       case FN_EXTCHAR:
       case '/':
       case '\\':
@@ -562,6 +572,7 @@ static inline bool has_disabled_path_chars(const char *str)
       case '@':
         return TRUE;
     }
+  }
   return FALSE;
 }
 
@@ -751,9 +762,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   const char **interval_array;
   enum legacy_db_type legacy_db_type;
   my_bitmap_map *bitmaps;
+  bool null_bits_are_used;
+  uint vcol_screen_length, UNINIT_VAR(options_len);
+  char *vcol_screen_pos;
+  uchar *UNINIT_VAR(options);
   uchar *extra_segment_buff= 0;
-  const uint format_section_header_size= 8;
-  uchar *format_section_fields= 0;
   DBUG_ENTER("open_binary_frm");
 
   new_field_pack_flag= head[27];
@@ -803,6 +816,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   if (!head[32])				// New frm file in 3.23
   {
     share->avg_row_length= uint4korr(head+34);
+    share->transactional= (ha_choice) (head[39] & 3);
+    share->page_checksum= (ha_choice) ((head[39] >> 2) & 3);
     share->row_type= (row_type) head[40];
     share->table_charset= get_charset((((uint) head[41]) << 8) + 
                                         (uint) head[38],MYF(0));
@@ -824,8 +839,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   share->db_record_offset= 1;
   if (db_create_options & HA_OPTION_LONG_BLOB_PTR)
     share->blob_ptr_size= portable_sizeof_char_ptr;
-  /* Set temporarily a good value for db_low_byte_first */
-  share->db_low_byte_first= test(legacy_db_type != DB_TYPE_ISAM);
   error=4;
   share->max_rows= uint4korr(head+18);
   share->min_rows= uint4korr(head+22);
@@ -863,7 +876,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 
   for (i=0 ; i < keys ; i++, keyinfo++)
   {
-    keyinfo->table= 0;                           // Updated in open_frm
     if (new_frm_ver >= 3)
     {
       keyinfo->flags=	   (uint) uint2korr(strpos) ^ HA_NOSAME;
@@ -929,6 +941,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   }
 
   share->reclength = uint2korr((head+16));
+  share->stored_rec_length= share->reclength;
   if (*(head+26) == 1)
     share->system= 1;				/* one-record-database */
 #ifdef HAVE_CRYPTED_FRM
@@ -948,7 +961,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
     /* Read extra data segment */
     uchar *next_chunk, *buff_end;
     DBUG_PRINT("info", ("extra segment size is %u bytes", n_length));
-    if (!(extra_segment_buff= (uchar*) my_malloc(n_length, MYF(MY_WME))))
+    if (!(extra_segment_buff= (uchar*) my_malloc(n_length + 1, MYF(MY_WME))))
       goto err;
     next_chunk= extra_segment_buff;
     if (mysql_file_pread(file, extra_segment_buff,
@@ -991,7 +1004,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           replacing it with a globally locked version of tmp_plugin
         */
         plugin_unlock(NULL, share->db_plugin);
-        share->db_plugin= my_plugin_lock(NULL, &tmp_plugin);
+        share->db_plugin= my_plugin_lock(NULL, tmp_plugin);
         DBUG_PRINT("info", ("setting dbtype to '%.*s' (%d)",
                             str_db_type_length, next_chunk + 2,
                             ha_legacy_type(share->db_type())));
@@ -1055,20 +1068,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 #endif
       next_chunk+= 5 + partition_info_str_len;
     }
-#if MYSQL_VERSION_ID < 50200
-    if (share->mysql_version >= 50106 && share->mysql_version <= 50109)
-    {
-      /*
-         Partition state array was here in version 5.1.6 to 5.1.9, this code
-         makes it possible to load a 5.1.6 table in later versions. Can most
-         likely be removed at some point in time. Will only be used for
-         upgrades within 5.1 series of versions. Upgrade to 5.2 can only be
-         done from newer 5.1 versions.
-      */
-      next_chunk+= 4;
-    }
-    else
-#endif
     if (share->mysql_version >= 50110)
     {
       /* New auto_partitioned indicator introduced in 5.1.11 */
@@ -1076,6 +1075,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       share->auto_partitioned= *next_chunk;
 #endif
       next_chunk++;
+      DBUG_ASSERT(next_chunk <= buff_end);
     }
     keyinfo= share->key_info;
     for (i= 0; i < keys; i++, keyinfo++)
@@ -1101,6 +1101,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
         }
       }
     }
+
     if (forminfo[46] == (uchar)255)
     {
       //reading long table comment
@@ -1119,57 +1120,19 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       next_chunk+= 2 + share->comment.length;
     }
 
-    if (next_chunk + format_section_header_size < buff_end)
+    DBUG_ASSERT(next_chunk <= buff_end);
+
+    if (share->db_create_options & HA_OPTION_TEXT_CREATE_OPTIONS)
     {
       /*
-        New extra data segment called "format section" with additional
-        table and column properties introduced by MySQL Cluster
-        based on 5.1.20
-
-        Table properties:
-        TABLESPACE <ts> and STORAGE [DISK|MEMORY]
-
-        Column properties:
-        COLUMN_FORMAT [DYNAMIC|FIXED] and STORAGE [DISK|MEMORY]
+        store options position, but skip till the time we will
+        know number of fields
       */
-      DBUG_PRINT("info", ("Found format section"));
-
-      /* header */
-      const uint format_section_length= uint2korr(next_chunk);
-      const uint format_section_flags= uint4korr(next_chunk+2);
-      /* 2 bytes unused */
-
-      if (next_chunk + format_section_length > buff_end)
-      {
-        DBUG_PRINT("error", ("format section length too long: %u",
-                             format_section_length));
-        goto err;
-      }
-      DBUG_PRINT("info", ("format_section_length: %u, format_section_flags: %u",
-                          format_section_length, format_section_flags));
-
-      share->default_storage_media=
-        (enum ha_storage_media) (format_section_flags & 0x7);
-
-      /* tablespace */
-      const char *tablespace=
-        (const char*)next_chunk + format_section_header_size;
-      const uint tablespace_length= strlen(tablespace);
-      if (tablespace_length &&
-          !(share->tablespace= strmake_root(&share->mem_root,
-                                            tablespace, tablespace_length+1)))
-      {
-        goto err;
-      }
-      DBUG_PRINT("info", ("tablespace: '%s'",
-                          share->tablespace ? share->tablespace : "<null>"));
-
-      /* pointer to format section for fields */
-      format_section_fields=
-        next_chunk + format_section_header_size + tablespace_length + 1;
-
-      next_chunk+= format_section_length;
+      options_len= uint4korr(next_chunk);
+      options= next_chunk + 4;
+      next_chunk+= options_len + 4;
     }
+    DBUG_ASSERT(next_chunk <= buff_end);
   }
   share->key_block_size= uint2korr(head+62);
 
@@ -1179,11 +1142,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   share->rec_buff_length= rec_buff_length;
   if (!(record= (uchar *) alloc_root(&share->mem_root,
                                      rec_buff_length)))
-    goto err;                                   /* purecov: inspected */
+    goto err;                          /* purecov: inspected */
   share->default_values= record;
   if (mysql_file_pread(file, record, (size_t) share->reclength,
                        record_offset, MYF(MY_NABP)))
-    goto err;                                   /* purecov: inspected */
+    goto err;                          /* purecov: inspected */
 
   mysql_file_seek(file, pos+288, MY_SEEK_SET, MYF(0));
 #ifdef HAVE_CRYPTED_FRM
@@ -1191,7 +1154,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   {
     crypted->decode((char*) forminfo+256,288-256);
     if (sint2korr(forminfo+284) != 0)		// Should be 0
-      goto err;                                 // Wrong password
+      goto err;                        // Wrong password
   }
 #endif
 
@@ -1203,6 +1166,9 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   int_length= uint2korr(forminfo+274);
   share->null_fields= uint2korr(forminfo+282);
   com_length= uint2korr(forminfo+284);
+  vcol_screen_length= uint2korr(forminfo+286);
+  share->vfields= 0;
+  share->stored_fields= share->fields;
   if (forminfo[46] != (uchar)255)
   {
     share->comment.length=  (int) (forminfo[46]);
@@ -1210,7 +1176,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
                                      share->comment.length);
   }
 
-  DBUG_PRINT("info",("i_count: %d  i_parts: %d  index: %d  n_length: %d  int_length: %d  com_length: %d", interval_count,interval_parts, share->keys,n_length,int_length, com_length));
+  DBUG_PRINT("info",("i_count: %d  i_parts: %d  index: %d  n_length: %d  int_length: %d  com_length: %d  vcol_screen_length: %d", interval_count,interval_parts, share->keys,n_length,int_length, com_length, vcol_screen_length));
+
 
   if (!(field_ptr = (Field **)
 	alloc_root(&share->mem_root,
@@ -1218,14 +1185,16 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 			   interval_count*sizeof(TYPELIB)+
 			   (share->fields+interval_parts+
 			    keys+3)*sizeof(char *)+
-			   (n_length+int_length+com_length)))))
-    goto err;                                   /* purecov: inspected */
+			   (n_length+int_length+com_length+
+			       vcol_screen_length)))))
+    goto err;                           /* purecov: inspected */
 
   share->field= field_ptr;
   read_length=(uint) (share->fields * field_pack_length +
-		      pos+ (uint) (n_length+int_length+com_length));
+		      pos+ (uint) (n_length+int_length+com_length+
+		                   vcol_screen_length));
   if (read_string(file,(uchar**) &disk_buff,read_length))
-    goto err;                                   /* purecov: inspected */
+    goto err;                           /* purecov: inspected */
 #ifdef HAVE_CRYPTED_FRM
   if (crypted)
   {
@@ -1244,7 +1213,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   memcpy((char*) names, strpos+(share->fields*field_pack_length),
 	 (uint) (n_length+int_length));
   comment_pos= names+(n_length+int_length);
-  memcpy(comment_pos, disk_buff+read_length-com_length, com_length);
+  memcpy(comment_pos, disk_buff+read_length-com_length-vcol_screen_length, 
+         com_length);
+  vcol_screen_pos= names+(n_length+int_length+com_length);
+  memcpy(vcol_screen_pos, disk_buff+read_length-vcol_screen_length, 
+         vcol_screen_length);
 
   fix_type_pointers(&interval_array, &share->fieldnames, 1, &names);
   if (share->fieldnames.count != share->fields)
@@ -1281,6 +1254,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
     goto err;
 
   record= share->default_values-1;              /* Fieldstart = 1 */
+  null_bits_are_used= share->null_fields != 0;
   if (share->null_field_first)
   {
     null_flags= null_pos= (uchar*) record+1;
@@ -1312,10 +1286,14 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   for (i=0 ; i < share->fields; i++, strpos+=field_pack_length, field_ptr++)
   {
     uint pack_flag, interval_nr, unireg_type, recpos, field_length;
+    uint vcol_info_length=0;
+    uint vcol_expr_length=0;
     enum_field_types field_type;
     CHARSET_INFO *charset=NULL;
     Field::geometry_type geom_type= Field::GEOM_GEOMETRY;
     LEX_STRING comment;
+    Virtual_column_info *vcol_info= 0;
+    bool fld_stored_in_db= TRUE;
 
     if (new_frm_ver >= 3)
     {
@@ -1351,6 +1329,18 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           goto err;
         }
       }
+
+      if ((uchar)field_type == (uchar)MYSQL_TYPE_VIRTUAL)
+      {
+        DBUG_ASSERT(interval_nr); // Expect non-null expression
+        /* 
+          The interval_id byte in the .frm file stores the length of the
+          expression statement for a virtual column.
+        */
+        vcol_info_length= interval_nr;
+        interval_nr= 0;
+      }
+
       if (!comment_length)
       {
 	comment.str= (char*) "";
@@ -1362,6 +1352,34 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 	comment.length= comment_length;
 	comment_pos+=   comment_length;
       }
+
+      if (vcol_info_length)
+      {
+        /*
+          Get virtual column data stored in the .frm file as follows:
+          byte 1      = 1 (always 1 to allow for future extensions)
+          byte 2      = sql_type
+          byte 3      = flags (as of now, 0 - no flags, 1 - field is physically stored)
+          byte 4-...  = virtual column expression (text data)
+        */
+        vcol_info= new Virtual_column_info();
+        if ((uint)vcol_screen_pos[0] != 1)
+        {
+          error= 4;
+          goto err;
+        }
+        field_type= (enum_field_types) (uchar) vcol_screen_pos[1];
+        fld_stored_in_db= (bool) (uint) vcol_screen_pos[2];
+        vcol_expr_length= vcol_info_length-(uint)FRM_VCOL_HEADER_SIZE;
+        if (!(vcol_info->expr_str.str=
+              (char *)memdup_root(&share->mem_root,
+                                  vcol_screen_pos+(uint)FRM_VCOL_HEADER_SIZE,
+                                  vcol_expr_length)))
+          goto err;
+        vcol_info->expr_str.length= vcol_expr_length;
+        vcol_screen_pos+= vcol_info_length;
+        share->vfields++;
+      }
     }
     else
     {
@@ -1452,8 +1470,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 
     reg_field->field_index= i;
     reg_field->comment=comment;
+    reg_field->vcol_info= vcol_info;
+    reg_field->stored_in_db= fld_stored_in_db;
     if (field_type == MYSQL_TYPE_BIT && !f_bit_as_char(pack_flag))
     {
+      null_bits_are_used= 1;
       if ((null_bit_pos+= field_length & 7) > 7)
       {
         null_pos++;
@@ -1474,7 +1495,9 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       share->timestamp_field_offset= i;
 
     if (use_hash)
-      if (my_hash_insert(&share->name_hash, (uchar*) field_ptr) )
+    {
+      if (my_hash_insert(&share->name_hash,
+                         (uchar*) field_ptr))
       {
         /*
           Set return code 8 here to indicate that an error has
@@ -1484,20 +1507,18 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
         error= 8; 
         goto err;
       }
-
-    if (format_section_fields)
+    }
+    if (!reg_field->stored_in_db)
     {
-      const uchar field_flags= format_section_fields[i];
-      const uchar field_storage= (field_flags & STORAGE_TYPE_MASK);
-      const uchar field_column_format=
-        ((field_flags >> COLUMN_FORMAT_SHIFT)& COLUMN_FORMAT_MASK);
-      DBUG_PRINT("debug", ("field flags: %u, storage: %u, column_format: %u",
-                           field_flags, field_storage, field_column_format));
-      (void)field_storage; /* Reserved by and used in MySQL Cluster */
-      (void)field_column_format; /* Reserved by and used in MySQL Cluster */
+      share->stored_fields--;
+      if (share->stored_rec_length>=recpos)
+        share->stored_rec_length= recpos-1;
     }
   }
   *field_ptr=0;					// End marker
+  /* Sanity checks: */
+  DBUG_ASSERT(share->fields>=share->stored_fields);
+  DBUG_ASSERT(share->reclength>=share->stored_rec_length);
 
   /* Fix key->name and key_part->field */
   if (key_parts)
@@ -1512,6 +1533,19 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
     {
       uint usable_parts= 0;
       keyinfo->name=(char*) share->keynames.type_names[key];
+      keyinfo->name_length= strlen(keyinfo->name);
+      keyinfo->cache_name=
+        (uchar*) alloc_root(&share->mem_root,
+                            share->table_cache_key.length+
+                            keyinfo->name_length + 1);
+      if (keyinfo->cache_name)           // If not out of memory
+      {
+        uchar *pos= keyinfo->cache_name;
+        memcpy(pos, share->table_cache_key.str, share->table_cache_key.length);
+        memcpy(pos + share->table_cache_key.length, keyinfo->name,
+               keyinfo->name_length+1);
+      }
+
       /* Fix fulltext keys for old .frm files */
       if (share->key_info[key].flags & HA_FULLTEXT)
 	share->key_info[key].algorithm= HA_KEY_ALG_FULLTEXT;
@@ -1559,7 +1593,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           key_part->null_bit= field->null_bit;
           key_part->store_length+=HA_KEY_NULL_LENGTH;
           keyinfo->flags|=HA_NULL_PART_KEY;
-          keyinfo->extra_length+= HA_KEY_NULL_LENGTH;
           keyinfo->key_length+= HA_KEY_NULL_LENGTH;
         }
         if (field->type() == MYSQL_TYPE_BLOB ||
@@ -1571,7 +1604,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
             key_part->key_part_flag|= HA_BLOB_PART;
           else
             key_part->key_part_flag|= HA_VAR_LENGTH_PART;
-          keyinfo->extra_length+=HA_KEY_BLOB_LENGTH;
           key_part->store_length+=HA_KEY_BLOB_LENGTH;
           keyinfo->key_length+= HA_KEY_BLOB_LENGTH;
         }
@@ -1644,21 +1676,22 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
                                 share->table_name.str,
                                 share->table_name.str);
             share->crashed= 1;                // Marker for CHECK TABLE
-            goto to_be_deleted;
+            continue;
           }
 #endif
           key_part->key_part_flag|= HA_PART_KEY_SEG;
         }
-
-	to_be_deleted:
-
-        /*
-          If the field can be NULL, don't optimize away the test
-          key_part_column = expression from the WHERE clause
-          as we need to test for NULL = NULL.
-        */
         if (field->real_maybe_null())
           key_part->key_part_flag|= HA_NULL_PART;
+        /*
+          Sometimes we can compare key parts for equality with memcmp.
+          But not always.
+        */
+        if (!(key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART |
+                                         HA_BIT_PART)) &&
+            key_part->type != HA_KEYTYPE_FLOAT &&
+            key_part->type == HA_KEYTYPE_DOUBLE)
+          key_part->key_part_flag|= HA_CAN_MEMCMP;
       }
       keyinfo->usable_key_parts= usable_parts; // Filesort
 
@@ -1707,6 +1740,15 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           null_length, 255);
   }
 
+  if (share->db_create_options & HA_OPTION_TEXT_CREATE_OPTIONS)
+  {
+    DBUG_ASSERT(options_len);
+    if (engine_table_options_frm_read(options, options_len, share))
+      goto err;
+  }
+  if (parse_engine_table_options(thd, handler_file->partition_ht(), share))
+    goto err;
+
   if (share->found_next_number_field)
   {
     reg_field= *share->found_next_number_field;
@@ -1748,8 +1790,10 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   share->null_bytes= (null_pos - (uchar*) null_flags +
                       (null_bit_pos + 7) / 8);
   share->last_null_bit_pos= null_bit_pos;
+  share->null_bytes_for_compare= null_bits_are_used ? share->null_bytes : 0;
+  share->can_cmp_whole_record= (share->blob_fields == 0 &&
+                                share->varchar_fields == 0);
 
-  share->db_low_byte_first= handler_file->low_byte_first();
   share->column_bitmap_size= bitmap_buffer_size(share->fields);
 
   if (!(bitmaps= (my_bitmap_map*) alloc_root(&share->mem_root,
@@ -1792,6 +1836,284 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   DBUG_RETURN(error);
 } /* open_binary_frm */
 
+/*
+  @brief
+    Clear GET_FIXED_FIELDS_FLAG in all fields of a table
+
+  @param
+    table     The table for whose fields the flags are to be cleared
+
+  @note
+    This routine is used for error handling purposes.
+
+  @return
+    none
+*/
+
+static void clear_field_flag(TABLE *table)
+{
+  Field **ptr;
+  DBUG_ENTER("clear_field_flag");
+
+  for (ptr= table->field; *ptr; ptr++)
+    (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG);
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  @brief 
+    Perform semantic analysis of the defining expression for a virtual column
+
+  @param
+    thd           The thread object
+  @param
+    table         The table containing the virtual column
+  @param
+    vcol_field    The virtual field whose defining expression is to be analyzed
+
+  @details
+    The function performs semantic analysis of the defining expression for
+    the virtual column vcol_field. The expression is used to compute the
+    values of this column.
+
+  @note
+   The function exploits the fact  that the fix_fields method sets the flag 
+   GET_FIXED_FIELDS_FLAG for all fields in the item tree.
+   This flag must always be unset before returning from this function
+   since it is used for other purposes as well.
+ 
+  @retval
+    TRUE           An error occurred, something was wrong with the function
+  @retval
+    FALSE          Otherwise
+*/
+
+bool fix_vcol_expr(THD *thd,
+                   TABLE *table,
+                   Field *vcol_field)
+{
+  Virtual_column_info *vcol_info= vcol_field->vcol_info;
+  Item* func_expr= vcol_info->expr_item;
+  bool result= TRUE;
+  TABLE_LIST tables;
+  int error;
+  const char *save_where;
+  Field **ptr, *field;
+  enum_mark_columns save_mark_used_columns= thd->mark_used_columns;
+  DBUG_ASSERT(func_expr);
+  DBUG_ENTER("fix_vcol_expr");
+
+  thd->mark_used_columns= MARK_COLUMNS_NONE;
+
+  save_where= thd->where;
+  thd->where= "virtual column function";
+
+  /* Fix fields referenced to by the virtual column function */
+  error= func_expr->fix_fields(thd, (Item**)0);
+
+  if (unlikely(error))
+  {
+    DBUG_PRINT("info", 
+    ("Field in virtual column expression does not belong to the table"));
+    goto end;
+  }
+  thd->where= save_where;
+  if (unlikely(func_expr->result_type() == ROW_RESULT))
+  {
+     my_error(ER_ROW_EXPR_FOR_VCOL, MYF(0));
+     goto end;
+  }
+#ifdef PARANOID
+  /*
+    Walk through the Item tree checking if all items are valid
+   to be part of the virtual column
+  */
+  error= func_expr->walk(&Item::check_vcol_func_processor, 0, NULL);
+  if (error)
+  {
+    my_error(ER_VIRTUAL_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(0), field_name);
+    goto end;
+  }
+#endif
+  if (unlikely(func_expr->const_item()))
+  {
+    my_error(ER_CONST_EXPR_IN_VCOL, MYF(0));
+    goto end;
+  }
+  /* Ensure that this virtual column is not based on another virtual field. */
+  ptr= table->field;
+  while ((field= *(ptr++))) 
+  {
+    if ((field->flags & GET_FIXED_FIELDS_FLAG) &&
+        (field->vcol_info))
+    {
+      my_error(ER_VCOL_BASED_ON_VCOL, MYF(0));
+      goto end;
+    }
+  }
+  result= FALSE;
+
+end:
+
+  /* Clear GET_FIXED_FIELDS_FLAG for the fields of the table */
+  clear_field_flag(table);
+
+  table->get_fields_in_item_tree= FALSE;
+  thd->mark_used_columns= save_mark_used_columns;
+  table->map= 0; //Restore old value
+ 
+ DBUG_RETURN(result);
+}
+
+/*
+  @brief
+    Unpack the definition of a virtual column from its linear representation
+
+  @parm
+    thd                  The thread object
+  @param
+    table                The table containing the virtual column
+  @param
+    field                The field for the virtual
+  @param  
+    vcol_expr            The string representation of the defining expression
+  @param[out]
+    error_reported       The flag to inform the caller that no other error
+                         messages are to be generated
+
+  @details
+    The function takes string representation 'vcol_expr' of the defining
+    expression for the virtual field 'field' of the table 'table' and
+    parses it, building an item object for it. The pointer to this item is
+    placed into in field->vcol_info.expr_item. After this the function performs
+    semantic analysis of the item by calling the the function fix_vcol_expr.
+    Since the defining expression is part of the table definition the item for
+    it is created in table->memroot within the special arena TABLE::expr_arena.
+
+  @note
+    Before passing 'vcol_expr" to the parser the function embraces it in 
+    parenthesis and prepands it a special keyword.
+  
+   @retval
+    FALSE           If a success
+   @retval
+    TRUE            Otherwise
+*/
+bool unpack_vcol_info_from_frm(THD *thd,
+                               TABLE *table,
+                               Field *field,
+                               LEX_STRING *vcol_expr,
+                               bool *error_reported)
+{
+  bool rc;
+  char *vcol_expr_str;
+  int str_len;
+  CHARSET_INFO *old_character_set_client;
+  Query_arena *backup_stmt_arena_ptr;
+  Query_arena backup_arena;
+  Query_arena *vcol_arena= 0;
+  Parser_state parser_state;
+  LEX *old_lex= thd->lex;
+  LEX lex;
+  DBUG_ENTER("unpack_vcol_info_from_frm");
+  DBUG_ASSERT(vcol_expr);
+
+  old_character_set_client= thd->variables.character_set_client;
+  backup_stmt_arena_ptr= thd->stmt_arena;
+
+  /* 
+    Step 1: Construct the input string for the parser.
+    The string to be parsed has to be of the following format:
+    "PARSE_VCOL_EXPR (<expr_string_from_frm>)".
+  */
+  
+  if (!(vcol_expr_str= (char*) alloc_root(&table->mem_root,
+                                          vcol_expr->length + 
+                                            parse_vcol_keyword.length + 3)))
+  {
+    DBUG_RETURN(TRUE);
+  }
+  memcpy(vcol_expr_str,
+         (char*) parse_vcol_keyword.str,
+         parse_vcol_keyword.length);
+  str_len= parse_vcol_keyword.length;
+  memcpy(vcol_expr_str + str_len, "(", 1);
+  str_len++;
+  memcpy(vcol_expr_str + str_len, 
+         (char*) vcol_expr->str, 
+         vcol_expr->length);
+  str_len+= vcol_expr->length;
+  memcpy(vcol_expr_str + str_len, ")", 1);
+  str_len++;
+  memcpy(vcol_expr_str + str_len, "\0", 1);
+  str_len++;
+
+  if (parser_state.init(thd, vcol_expr_str, str_len))
+    goto err;
+
+  /* 
+    Step 2: Setup thd for parsing.
+  */
+  vcol_arena= table->expr_arena;
+  if (!vcol_arena)
+  {
+    /*
+      We need to use CONVENTIONAL_EXECUTION here to ensure that
+      any new items created by fix_fields() are not reverted.
+    */
+    Query_arena expr_arena(&table->mem_root,
+                           Query_arena::STMT_CONVENTIONAL_EXECUTION);
+    if (!(vcol_arena= (Query_arena *) alloc_root(&table->mem_root,
+                                                 sizeof(Query_arena))))
+      goto err;
+    *vcol_arena= expr_arena;
+    table->expr_arena= vcol_arena;
+  }
+  thd->set_n_backup_active_arena(vcol_arena, &backup_arena);
+  thd->stmt_arena= vcol_arena;
+
+  if (init_lex_with_single_table(thd, table, &lex))
+    goto err;
+
+  thd->lex->parse_vcol_expr= TRUE;
+
+  /* 
+    Step 3: Use the parser to build an Item object from vcol_expr_str.
+  */
+  if (parse_sql(thd, &parser_state, NULL))
+  {
+    goto err;
+  }
+  /* From now on use vcol_info generated by the parser. */
+  field->vcol_info= thd->lex->vcol_info;
+
+  /* Validate the Item tree. */
+  if (fix_vcol_expr(thd, table, field))
+  {
+    *error_reported= TRUE;
+    field->vcol_info= 0;
+    goto err;
+  }
+  rc= FALSE;
+  goto end;
+
+err:
+  rc= TRUE;
+  thd->free_items();
+end:
+  thd->stmt_arena= backup_stmt_arena_ptr;
+  if (vcol_arena)
+    thd->restore_active_arena(vcol_arena, &backup_arena);
+  end_lex_with_single_table(thd, table, old_lex);
+  thd->variables.character_set_client= old_character_set_client;
+
+  DBUG_RETURN(rc);
+}
+
+/*
+  Read data from a binary .frm file from MySQL 3.23 - 5.0 into TABLE_SHARE
+*/
 
 /*
   Open a table based on a TABLE_SHARE
@@ -1826,11 +2148,14 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
   uint records, i, bitmap_size;
   bool error_reported= FALSE;
   uchar *record, *bitmaps;
-  Field **field_ptr;
+  Field **field_ptr, **vfield_ptr;
+  uint8 save_context_analysis_only= thd->lex->context_analysis_only;
   DBUG_ENTER("open_table_from_share");
   DBUG_PRINT("enter",("name: '%s.%s'  form: 0x%lx", share->db.str,
                       share->table_name.str, (long) outparam));
 
+  thd->lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_VIEW; // not a view
+
   error= 1;
   bzero((char*) outparam, sizeof(*outparam));
   outparam->in_use= thd;
@@ -1840,7 +2165,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
 
   init_sql_alloc(&outparam->mem_root, TABLE_ALLOC_BLOCK_SIZE, 0);
 
-  if (!(outparam->alias= my_strdup(alias, MYF(MY_WME))))
+  if (outparam->alias.copy(alias, strlen(alias), table_alias_charset))
     goto err;
   outparam->quick_keys.init();
   outparam->covering_keys.init();
@@ -1887,7 +2212,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
       outparam->record[1]= outparam->record[0];   // Safety
   }
 
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   /*
     We need this because when we read var-length rows, we are not updating
     bytes after end of varchar
@@ -1977,6 +2302,34 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
     }
   }
 
+  /*
+    Process virtual columns, if any.
+  */
+  if (!(vfield_ptr = (Field **) alloc_root(&outparam->mem_root,
+                                          (uint) ((share->vfields+1)*
+                                                  sizeof(Field*)))))
+    goto err;
+
+  outparam->vfield= vfield_ptr;
+  
+  for (field_ptr= outparam->field; *field_ptr; field_ptr++)
+  {
+    if ((*field_ptr)->vcol_info)
+    {
+      if (unpack_vcol_info_from_frm(thd,
+                                    outparam,
+                                    *field_ptr,
+                                    &(*field_ptr)->vcol_info->expr_str,
+                                    &error_reported))
+      {
+        error= 4; // in case no error is reported
+        goto err;
+      }
+      *(vfield_ptr++)= *field_ptr;
+    }
+  }
+  *vfield_ptr= 0;                              // End marker
+
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   if (share->partition_info_str_len && outparam->file)
   {
@@ -2043,17 +2396,32 @@ partititon_err:
   }
 #endif
 
+  /* Check virtual columns against table's storage engine. */
+  if (share->vfields && 
+        !(outparam->file && 
+          (outparam->file->ha_table_flags() & HA_CAN_VIRTUAL_COLUMNS)))
+  {
+    my_error(ER_UNSUPPORTED_ENGINE_FOR_VIRTUAL_COLUMNS, MYF(0),
+             plugin_name(share->db_plugin)->str);
+    error_reported= TRUE;
+    goto err;
+  }
+
   /* Allocate bitmaps */
 
   bitmap_size= share->column_bitmap_size;
-  if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, bitmap_size*3)))
+  if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, bitmap_size*5)))
     goto err;
   bitmap_init(&outparam->def_read_set,
               (my_bitmap_map*) bitmaps, share->fields, FALSE);
   bitmap_init(&outparam->def_write_set,
               (my_bitmap_map*) (bitmaps+bitmap_size), share->fields, FALSE);
-  bitmap_init(&outparam->tmp_set,
+  bitmap_init(&outparam->def_vcol_set,
               (my_bitmap_map*) (bitmaps+bitmap_size*2), share->fields, FALSE);
+  bitmap_init(&outparam->tmp_set,
+              (my_bitmap_map*) (bitmaps+bitmap_size*3), share->fields, FALSE);
+  bitmap_init(&outparam->eq_join_set,
+              (my_bitmap_map*) (bitmaps+bitmap_size*4), share->fields, FALSE);
   outparam->default_column_bitmaps();
 
   /* The table struct is now initialized;  Open the table */
@@ -2108,7 +2476,7 @@ partititon_err:
     }
   }
 
-#if defined(HAVE_purify) && !defined(DBUG_OFF)
+#if defined(HAVE_valgrind) && !defined(DBUG_OFF)
   bzero((char*) bitmaps, bitmap_size*3);
 #endif
 
@@ -2117,6 +2485,7 @@ partititon_err:
                                HA_HAS_OWN_BINLOGGING);
   thd->status_var.opened_tables++;
 
+  thd->lex->context_analysis_only= save_context_analysis_only;
   DBUG_RETURN (0);
 
  err:
@@ -2129,8 +2498,9 @@ partititon_err:
 #endif
   outparam->file= 0;				// For easier error checking
   outparam->db_stat=0;
+  thd->lex->context_analysis_only= save_context_analysis_only;
   free_root(&outparam->mem_root, MYF(0));       // Safe to call on bzero'd root
-  my_free((void *) outparam->alias);
+  outparam->alias.free();
   DBUG_RETURN (error);
 }
 
@@ -2151,13 +2521,20 @@ int closefrm(register TABLE *table, bool free_share)
   DBUG_PRINT("enter", ("table: 0x%lx", (long) table));
 
   if (table->db_stat)
-    error=table->file->close();
-  my_free((void *) table->alias);
-  table->alias= 0;
+  {
+    if (table->s->deleting)
+      table->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
+    error=table->file->ha_close();
+  }
+  table->alias.free();
+  if (table->expr_arena)
+    table->expr_arena->free_items();
   if (table->field)
   {
     for (Field **ptr=table->field ; *ptr ; ptr++)
+    {
       delete *ptr;
+    }
     table->field= 0;
   }
   delete table->file;
@@ -2370,13 +2747,17 @@ void open_table_error(TABLE_SHARE *share, int error, int db_errno, int errarg)
 {
   int err_no;
   char buff[FN_REFLEN];
-  myf errortype= ME_ERROR+ME_WAITTANG;
+  myf errortype= ME_ERROR+ME_WAITTANG;          // Write fatals error to log
   DBUG_ENTER("open_table_error");
 
   switch (error) {
   case 7:
   case 1:
-    if (db_errno == ENOENT)
+    /*
+      Test if file didn't exists. We have to also test for EINVAL as this
+      may happen on windows when opening a file with a not legal file name
+    */
+    if (db_errno == ENOENT || db_errno == EINVAL)
       my_error(ER_NO_SUCH_TABLE, MYF(0), share->db.str, share->table_name.str);
     else
     {
@@ -2633,11 +3014,12 @@ File create_frm(THD *thd, const char *name, const char *db,
   int create_flags= O_RDWR | O_TRUNC;
   ulong key_comment_total_bytes= 0;
   uint i;
+  DBUG_ENTER("create_frm");
 
   if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
     create_flags|= O_EXCL | O_NOFOLLOW;
 
-  /* Fix this when we have new .frm files;  Current limit is 4G rows (QQ) */
+  /* Fix this when we have new .frm files;  Current limit is 4G rows (TODO) */
   if (create_info->max_rows > UINT_MAX32)
     create_info->max_rows= UINT_MAX32;
   if (create_info->min_rows > UINT_MAX32)
@@ -2699,11 +3081,8 @@ File create_frm(THD *thd, const char *name, const char *db,
     csid= (create_info->default_table_charset ?
            create_info->default_table_charset->number : 0);
     fileinfo[38]= (uchar) csid;
-    /*
-      In future versions, we will store in fileinfo[39] the values of the
-      TRANSACTIONAL and PAGE_CHECKSUM clauses of CREATE TABLE.
-    */
-    fileinfo[39]= 0;
+    fileinfo[39]= (uchar) ((uint) create_info->transactional |
+                           ((uint) create_info->page_checksum << 2));
     fileinfo[40]= (uchar) create_info->row_type;
     /* Next few bytes where for RAID support */
     fileinfo[41]= (uchar) (csid >> 8);
@@ -2739,7 +3118,7 @@ File create_frm(THD *thd, const char *name, const char *db,
     else
       my_error(ER_CANT_CREATE_TABLE,MYF(0),table,my_errno);
   }
-  return (file);
+  DBUG_RETURN(file);
 } /* create_frm */
 
 
@@ -2756,8 +3135,9 @@ void update_create_info_from_table(HA_CREATE_INFO *create_info, TABLE *table)
   create_info->default_table_charset= share->table_charset;
   create_info->table_charset= 0;
   create_info->comment= share->comment;
-  create_info->storage_media= share->default_storage_media;
-  create_info->tablespace= share->tablespace;
+  create_info->transactional= share->transactional;
+  create_info->page_checksum= share->page_checksum;
+  create_info->option_list= share->option_list;
 
   DBUG_VOID_RETURN;
 }
@@ -2879,15 +3259,15 @@ bool check_db_name(LEX_STRING *org_name)
   uint name_length= org_name->length;
   bool check_for_path_chars;
 
-  if (!name_length || name_length > NAME_LEN)
-    return 1;
-
   if ((check_for_path_chars= check_mysql50_prefix(name)))
   {
     name+= MYSQL50_TABLE_NAME_PREFIX_LENGTH;
     name_length-= MYSQL50_TABLE_NAME_PREFIX_LENGTH;
   }
 
+  if (!name_length || name_length > NAME_LEN)
+    return 1;
+
   if (lower_case_table_names && name != any_db)
     my_casedn_str(files_charset_info, name);
 
@@ -2906,6 +3286,15 @@ bool check_table_name(const char *name, size_t length, bool check_for_path_chars
   // name length in symbols
   size_t name_length= 0;
   const char *end= name+length;
+
+
+  if (!check_for_path_chars &&
+      (check_for_path_chars= check_mysql50_prefix(name)))
+  {
+    name+= MYSQL50_TABLE_NAME_PREFIX_LENGTH;
+    length-= MYSQL50_TABLE_NAME_PREFIX_LENGTH;
+  }
+
   if (!length || length > NAME_LEN)
     return 1;
 #if defined(USE_MB) && defined(USE_MB_IDENT)
@@ -2924,7 +3313,7 @@ bool check_table_name(const char *name, size_t length, bool check_for_path_chars
       int len=my_ismbchar(system_charset_info, name, end);
       if (len)
       {
-        name += len;
+        name+= len;
         name_length++;
         continue;
       }
@@ -3003,7 +3392,7 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
   const TABLE_FIELD_TYPE *field_def= table_def->field;
   DBUG_ENTER("table_check_intact");
   DBUG_PRINT("info",("table: %s  expected_count: %d",
-                     table->alias, table_def->count));
+                     table->alias.c_ptr(), table_def->count));
 
   /* Whether the table definition has already been validated. */
   if (table->s->table_field_def_cache == table_def)
@@ -3018,7 +3407,7 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
     {
       report_error(ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE,
                    ER(ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE),
-                   table->alias, table_def->count, table->s->fields,
+                   table->alias.c_ptr(), table_def->count, table->s->fields,
                    static_cast<int>(table->s->mysql_version),
                    MYSQL_VERSION_ID);
       DBUG_RETURN(TRUE);
@@ -3026,7 +3415,8 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
     else if (MYSQL_VERSION_ID == table->s->mysql_version)
     {
       report_error(ER_COL_COUNT_DOESNT_MATCH_CORRUPTED,
-                   ER(ER_COL_COUNT_DOESNT_MATCH_CORRUPTED), table->alias,
+                   ER(ER_COL_COUNT_DOESNT_MATCH_CORRUPTED),
+                   table->alias.c_ptr(),
                    table_def->count, table->s->fields);
       DBUG_RETURN(TRUE);
     }
@@ -3038,11 +3428,13 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
       is backward compatible.
     */
   }
-  char buffer[STRING_BUFFER_USUAL_SIZE];
+  char buffer[1024];
   for (i=0 ; i < table_def->count; i++, field_def++)
   {
     String sql_type(buffer, sizeof(buffer), system_charset_info);
     sql_type.length(0);
+    /* Allocate min 256 characters at once */
+    sql_type.extra_allocation(256);
     if (i < table->s->fields)
     {
       Field *field= table->field[i];
@@ -3057,7 +3449,8 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
         */
         report_error(0, "Incorrect definition of table %s.%s: "
                      "expected column '%s' at position %d, found '%s'.",
-                     table->s->db.str, table->alias, field_def->name.str, i,
+                     table->s->db.str, table->alias.c_ptr(),
+                     field_def->name.str, i,
                      field->field_name);
       }
       field->sql_type(sql_type);
@@ -3083,7 +3476,8 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
       {
         report_error(0, "Incorrect definition of table %s.%s: "
                      "expected column '%s' at position %d to have type "
-                     "%s, found type %s.", table->s->db.str, table->alias,
+                     "%s, found type %s.", table->s->db.str,
+                     table->alias.c_ptr(),
                      field_def->name.str, i, field_def->type.str,
                      sql_type.c_ptr_safe());
         error= TRUE;
@@ -3093,7 +3487,8 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
         report_error(0, "Incorrect definition of table %s.%s: "
                      "expected the type of column '%s' at position %d "
                      "to have character set '%s' but the type has no "
-                     "character set.", table->s->db.str, table->alias,
+                     "character set.", table->s->db.str,
+                     table->alias.c_ptr(),
                      field_def->name.str, i, field_def->cset.str);
         error= TRUE;
       }
@@ -3103,7 +3498,8 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
         report_error(0, "Incorrect definition of table %s.%s: "
                      "expected the type of column '%s' at position %d "
                      "to have character set '%s' but found "
-                     "character set '%s'.", table->s->db.str, table->alias,
+                     "character set '%s'.", table->s->db.str,
+                     table->alias.c_ptr(),
                      field_def->name.str, i, field_def->cset.str,
                      field->charset()->csname);
         error= TRUE;
@@ -3114,7 +3510,7 @@ Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def)
       report_error(0, "Incorrect definition of table %s.%s: "
                    "expected column '%s' at position %d to have type %s "
                    " but the column is not found.",
-                   table->s->db.str, table->alias,
+                   table->s->db.str, table->alias.c_ptr(),
                    field_def->name.str, i, field_def->type.str);
       error= TRUE;
     }
@@ -3333,12 +3729,8 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
                                    s->table_name.str,
                                    tl->alias);
   /* Fix alias if table name changes. */
-  if (strcmp(alias, tl->alias))
-  {
-    uint length= (uint) strlen(tl->alias)+1;
-    alias= (char*) my_realloc((char*) alias, length, MYF(MY_WME));
-    memcpy((char*) alias, tl->alias, length);
-  }
+  if (strcmp(alias.c_ptr(), tl->alias))
+    alias.copy(tl->alias, strlen(tl->alias), alias.charset());
 
   tablenr= thd->current_tablenr++;
   used_fields= 0;
@@ -3351,8 +3743,9 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
   status= STATUS_NO_RECORD;
   insert_values= 0;
   fulltext_searched= 0;
-  file->ft_handler= 0;
+  file->ha_start_of_new_statement();
   reginfo.impossible_range= 0;
+  created= TRUE;
 
   /* Catch wrong handling of the auto_increment_field_not_null. */
   DBUG_ASSERT(!auto_increment_field_not_null);
@@ -3367,6 +3760,17 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
 
   DBUG_ASSERT(key_read == 0);
 
+  /* mark the record[0] uninitialized */
+  TRASH(record[0], s->reclength);
+
+  /*
+    Initialize the null marker bits, to ensure that if we are doing a read
+    of only selected columns (like in keyread), all null markers are
+    initialized.
+  */
+  memset(record[0], 255, s->null_bytes); 
+  memset(record[1], 255, s->null_bytes); 
+
   /* Tables may be reused in a sub statement. */
   DBUG_ASSERT(!file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
 }
@@ -3452,129 +3856,112 @@ void  TABLE_LIST::calc_md5(char *buffer)
 
 
 /**
-   @brief Set underlying table for table place holder of view.
-
-   @details
-
-   Replace all views that only use one table with the table itself.  This
-   allows us to treat the view as a simple table and even update it (it is a
-   kind of optimization).
+  @brief
+  Create field translation for mergeable derived table/view.
 
-   @note 
+  @param thd  Thread handle
 
-   This optimization is potentially dangerous as it makes views
-   masquerade as base tables: Views don't have the pointer TABLE_LIST::table
-   set to non-@c NULL.
+  @details
+  Create field translation for mergeable derived table/view.
 
-   We may have the case where a view accesses tables not normally accessible
-   in the current Security_context (only in the definer's
-   Security_context). According to the table's GRANT_INFO (TABLE::grant),
-   access is fulfilled, but this is implicitly meant in the definer's security
-   context. Hence we must never look at only a TABLE's GRANT_INFO without
-   looking at the one of the referring TABLE_LIST.
+  @return FALSE ok.
+  @return TRUE an error occur.
 */
 
-void TABLE_LIST::set_underlying_merge()
+bool TABLE_LIST::create_field_translation(THD *thd)
 {
-  TABLE_LIST *tbl;
+  Item *item;
+  Field_translator *transl;
+  SELECT_LEX *select= get_single_select();
+  List_iterator_fast<Item> it(select->item_list);
+  uint field_count= 0;
+  Query_arena *arena= thd->stmt_arena, backup;
+  bool res= FALSE;
+
+  used_items.empty();
 
-  if ((tbl= merge_underlying_list))
+  if (field_translation)
   {
-    /* This is a view. Process all tables of view */
-    DBUG_ASSERT(view && effective_algorithm == VIEW_ALGORITHM_MERGE);
-    do
+    /*
+      Update items in the field translation aftet view have been prepared.
+      It's needed because some items in the select list, like IN subselects,
+      might be substituted for optimized ones.
+    */
+    if (is_view() && get_unit()->prepared && !field_translation_updated)
     {
-      if (tbl->merge_underlying_list)          // This is a view
+      while ((item= it++))
       {
-        DBUG_ASSERT(tbl->view &&
-                    tbl->effective_algorithm == VIEW_ALGORITHM_MERGE);
-        /*
-          This is the only case where set_ancestor is called on an object
-          that may not be a view (in which case ancestor is 0)
-        */
-        tbl->merge_underlying_list->set_underlying_merge();
+        field_translation[field_count++].item= item;
       }
-    } while ((tbl= tbl->next_local));
-
-    if (!multitable_view)
-    {
-      table= merge_underlying_list->table;
-      schema_table= merge_underlying_list->schema_table;
+      field_translation_updated= TRUE;
     }
+
+    return FALSE;
+  }
+
+  if (arena->is_conventional())
+    arena= 0;                                   // For easier test
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+
+  /* Create view fields translation table */
+
+  if (!(transl=
+        (Field_translator*)(thd->stmt_arena->
+                            alloc(select->item_list.elements *
+                                  sizeof(Field_translator)))))
+  {
+    res= TRUE;
+    goto exit;
+  }
+
+  while ((item= it++))
+  {
+    transl[field_count].name= item->name;
+    transl[field_count++].item= item;
   }
+  field_translation= transl;
+  field_translation_end= transl + field_count;
+
+exit:
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+
+  return res;
 }
 
 
-/*
-  setup fields of placeholder of merged VIEW
+/**
+  @brief
+  Create field translation for mergeable derived table/view.
 
-  SYNOPSIS
-    TABLE_LIST::setup_underlying()
-    thd		    - thread handler
+  @param thd  Thread handle
 
-  DESCRIPTION
-    It is:
-    - preparing translation table for view columns
-    If there are underlying view(s) procedure first will be called for them.
+  @details
+  Create field translation for mergeable derived table/view.
 
-  RETURN
-    FALSE - OK
-    TRUE  - error
+  @return FALSE ok.
+  @return TRUE an error occur.
 */
 
 bool TABLE_LIST::setup_underlying(THD *thd)
 {
   DBUG_ENTER("TABLE_LIST::setup_underlying");
 
-  if (!field_translation && merge_underlying_list)
+  if (!view || (!field_translation && merge_underlying_list))
   {
-    Field_translator *transl;
-    SELECT_LEX *select= &view->select_lex;
-    Item *item;
-    TABLE_LIST *tbl;
-    List_iterator_fast<Item> it(select->item_list);
-    uint field_count= 0;
-
-    if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*) &field_count))
-    {
-      DBUG_RETURN(TRUE);
-    }
-
-    for (tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
-    {
-      if (tbl->merge_underlying_list &&
-          tbl->setup_underlying(thd))
-      {
-        DBUG_RETURN(TRUE);
-      }
-    }
-
-    /* Create view fields translation table */
-
-    if (!(transl=
-          (Field_translator*)(thd->stmt_arena->
-                              alloc(select->item_list.elements *
-                                    sizeof(Field_translator)))))
-    {
+    SELECT_LEX *select= get_single_select();
+    
+    if (create_field_translation(thd))
       DBUG_RETURN(TRUE);
-    }
-
-    while ((item= it++))
-    {
-      transl[field_count].name= item->name;
-      transl[field_count++].item= item;
-    }
-    field_translation= transl;
-    field_translation_end= transl + field_count;
-    /* TODO: use hash for big number of fields */
 
     /* full text function moving to current select */
-    if (view->select_lex.ftfunc_list->elements)
+    if (select->ftfunc_list->elements)
     {
       Item_func_match *ifm;
       SELECT_LEX *current_select= thd->lex->current_select;
       List_iterator_fast<Item_func_match>
-        li(*(view->select_lex.ftfunc_list));
+        li(*(select_lex->ftfunc_list));
       while ((ifm= li++))
         current_select->ftfunc_list->push_front(ifm);
     }
@@ -3584,7 +3971,7 @@ bool TABLE_LIST::setup_underlying(THD *thd)
 
 
 /*
-  Prepare where expression of view
+   Prepare where expression of derived table/view
 
   SYNOPSIS
     TABLE_LIST::prep_where()
@@ -3608,7 +3995,8 @@ bool TABLE_LIST::prep_where(THD *thd, Item **conds,
 
   for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
   {
-    if (tbl->view && tbl->prep_where(thd, conds, no_where_clause))
+    if (tbl->is_view_or_derived() &&
+        tbl->prep_where(thd, conds, no_where_clause))
     {
       DBUG_RETURN(TRUE);
     }
@@ -3616,6 +4004,8 @@ bool TABLE_LIST::prep_where(THD *thd, Item **conds,
 
   if (where)
   {
+    if (where->fixed)
+      where->update_used_tables();
     if (!where->fixed && where->fix_fields(thd, &where))
     {
       DBUG_RETURN(TRUE);
@@ -3648,7 +4038,13 @@ bool TABLE_LIST::prep_where(THD *thd, Item **conds,
         }
       }
       if (tbl == 0)
+      {
+        if (*conds && !(*conds)->fixed)
+	  (*conds)->fix_fields(thd, conds);
         *conds= and_conds(*conds, where->copy_andor_structure(thd));
+        if (*conds && !(*conds)->fixed)
+          (*conds)->fix_fields(thd, conds);        
+      }
       if (arena)
         thd->restore_active_arena(arena, &backup);
       where_processed= TRUE;
@@ -3687,10 +4083,11 @@ merge_on_conds(THD *thd, TABLE_LIST *table, bool is_cascaded)
   DBUG_PRINT("info", ("alias: %s", table->alias));
   if (table->on_expr)
     cond= table->on_expr->copy_andor_structure(thd);
-  if (!table->nested_join)
+  if (!table->view)
     DBUG_RETURN(cond);
-  List_iterator<TABLE_LIST> li(table->nested_join->join_list);
-  while (TABLE_LIST *tbl= li++)
+  for (TABLE_LIST *tbl= (TABLE_LIST*)table->view->select_lex.table_list.first;
+       tbl;
+       tbl= tbl->next_local)
   {
     if (tbl->view && !is_cascaded)
       continue;
@@ -3730,7 +4127,7 @@ bool TABLE_LIST::prep_check_option(THD *thd, uint8 check_opt_type)
 {
   DBUG_ENTER("TABLE_LIST::prep_check_option");
   bool is_cascaded= check_opt_type == VIEW_CHECK_CASCADED;
-
+  TABLE_LIST *merge_underlying_list= view->select_lex.get_table_list();
   for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
   {
     /* see comment of check_opt_type parameter */
@@ -3747,7 +4144,6 @@ bool TABLE_LIST::prep_check_option(THD *thd, uint8 check_opt_type)
 
     if (where)
     {
-      DBUG_ASSERT(where->fixed);
       check_option= where->copy_andor_structure(thd);
     }
     if (is_cascaded)
@@ -3772,7 +4168,7 @@ bool TABLE_LIST::prep_check_option(THD *thd, uint8 check_opt_type)
     const char *save_where= thd->where;
     thd->where= "check option";
     if ((!check_option->fixed &&
-        check_option->fix_fields(thd, &check_option)) ||
+         check_option->fix_fields(thd, &check_option)) ||
         check_option->check_cols(1))
     {
       DBUG_RETURN(TRUE);
@@ -3843,10 +4239,14 @@ void TABLE_LIST::hide_view_error(THD *thd)
 TABLE_LIST *TABLE_LIST::find_underlying_table(TABLE *table_to_find)
 {
   /* is this real table and table which we are looking for? */
-  if (table == table_to_find && merge_underlying_list == 0)
+  if (table == table_to_find && view == 0)
     return this;
+  if (!view)
+    return 0;
 
-  for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+  for (TABLE_LIST *tbl= view->select_lex.get_table_list();
+       tbl;
+       tbl= tbl->next_local)
   {
     TABLE_LIST *result;
     if ((result= tbl->find_underlying_table(table_to_find)))
@@ -3928,7 +4328,12 @@ bool TABLE_LIST::check_single_table(TABLE_LIST **table_arg,
                                        table_map map,
                                        TABLE_LIST *view_arg)
 {
-  for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+  if (!select_lex)
+    return FALSE;
+  DBUG_ASSERT(is_merged_derived());
+  for (TABLE_LIST *tbl= get_single_select()->get_table_list();
+       tbl;
+       tbl= tbl->next_local)
   {
     if (tbl->table)
     {
@@ -3970,8 +4375,10 @@ bool TABLE_LIST::set_insert_values(MEM_ROOT *mem_root)
   }
   else
   {
-    DBUG_ASSERT(view && merge_underlying_list);
-    for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+    DBUG_ASSERT(is_view_or_derived() && is_merged_derived());
+    for (TABLE_LIST *tbl= (TABLE_LIST*)view->select_lex.table_list.first;
+         tbl;
+         tbl= tbl->next_local)
       if (tbl->set_insert_values(mem_root))
         return TRUE;
   }
@@ -3997,7 +4404,7 @@ bool TABLE_LIST::set_insert_values(MEM_ROOT *mem_root)
 */
 bool TABLE_LIST::is_leaf_for_name_resolution()
 {
-  return (view || is_natural_join || is_join_columns_complete ||
+  return (is_merged_derived() || is_natural_join || is_join_columns_complete ||
           !nested_join);
 }
 
@@ -4135,7 +4542,11 @@ void TABLE_LIST::register_want_access(ulong want_access)
     if (table)
       table->grant.want_privilege= want_access;
   }
-  for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+  if (!view)
+    return;
+  for (TABLE_LIST *tbl= view->select_lex.get_table_list();
+       tbl;
+       tbl= tbl->next_local)
     tbl->register_want_access(want_access);
 }
 
@@ -4371,14 +4782,23 @@ const char *Natural_join_column::db_name()
                       table_ref->table->s->db.str) ||
               (table_ref->schema_table &&
                is_infoschema_db(table_ref->table->s->db.str,
-                                table_ref->table->s->db.length)));
+                                table_ref->table->s->db.length)) ||
+               table_ref->is_materialized_derived());
   return table_ref->db;
 }
 
 
 GRANT_INFO *Natural_join_column::grant()
 {
-  if (view_field)
+/*  if (view_field)
+    return &(table_ref->grant);
+  return &(table_ref->table->grant);*/
+  /*
+    Have to check algorithm because merged derived also has
+    field_translation.
+  */
+//if (table_ref->effective_algorithm == DTYPE_ALGORITHM_MERGE)
+  if (table_ref->is_merged_derived())
     return &(table_ref->grant);
   return &(table_ref->table->grant);
 }
@@ -4459,7 +4879,17 @@ Item *create_view_field(THD *thd, TABLE_LIST *view, Item **field_ref,
   {
     DBUG_RETURN(field);
   }
-  Item *item= new Item_direct_view_ref(view, field_ref, name);
+  Item *item= new Item_direct_view_ref(&view->view->select_lex.context,
+                                       field_ref, view->alias,
+                                       name, view);
+  /*
+    Force creation of nullable item for the result tmp table for outer joined
+    views/derived tables.
+  */
+  if (view->table && view->table->maybe_null)
+    item->maybe_null= TRUE;
+  /* Save item in case we will need to fall back to materialization. */
+  view->used_items.push_back(item);
   DBUG_RETURN(item);
 }
 
@@ -4497,15 +4927,15 @@ void Field_iterator_table_ref::set_field_iterator()
     DBUG_ASSERT(table_ref->is_natural_join ||
                 table_ref->nested_join ||
                 (table_ref->join_columns &&
-                /* This is a merge view. */
-                ((table_ref->field_translation &&
-                  table_ref->join_columns->elements ==
-                  (ulong)(table_ref->field_translation_end -
-                          table_ref->field_translation)) ||
-                 /* This is stored table or a tmptable view. */
-                 (!table_ref->field_translation &&
-                  table_ref->join_columns->elements ==
-                  table_ref->table->s->fields))));
+                 /* This is a merge view. */
+                 ((table_ref->field_translation &&
+                   table_ref->join_columns->elements ==
+                   (ulong)(table_ref->field_translation_end -
+                           table_ref->field_translation)) ||
+                  /* This is stored table or a tmptable view. */
+                  (!table_ref->field_translation &&
+                   table_ref->join_columns->elements ==
+                   table_ref->table->s->fields))));
     field_it= &natural_join_it;
     DBUG_PRINT("info",("field_it for '%s' is Field_iterator_natural_join",
                        table_ref->alias));
@@ -4513,8 +4943,7 @@ void Field_iterator_table_ref::set_field_iterator()
   /* This is a merge view, so use field_translation. */
   else if (table_ref->field_translation)
   {
-    DBUG_ASSERT(table_ref->view &&
-                table_ref->effective_algorithm == VIEW_ALGORITHM_MERGE);
+    DBUG_ASSERT(table_ref->is_merged_derived());
     field_it= &view_field_it;
     DBUG_PRINT("info", ("field_it for '%s' is Field_iterator_view",
                         table_ref->alias));
@@ -4764,9 +5193,10 @@ void TABLE::clear_column_bitmaps()
     Reset column read/write usage. It's identical to:
     bitmap_clear_all(&table->def_read_set);
     bitmap_clear_all(&table->def_write_set);
+    bitmap_clear_all(&table->def_vcol_set);
   */
-  bzero((char*) def_read_set.bitmap, s->column_bitmap_size*2);
-  column_bitmaps_set(&def_read_set, &def_write_set);
+  bzero((char*) def_read_set.bitmap, s->column_bitmap_size*3);
+  column_bitmaps_set(&def_read_set, &def_write_set, &def_vcol_set);
 }
 
 
@@ -4809,7 +5239,7 @@ void TABLE::mark_columns_used_by_index(uint index)
   MY_BITMAP *bitmap= &tmp_set;
   DBUG_ENTER("TABLE::mark_columns_used_by_index");
 
-  set_keyread(TRUE);
+  enable_keyread();
   bitmap_clear_all(bitmap);
   mark_columns_used_by_index_no_reset(index, bitmap);
   column_bitmaps_set(bitmap, bitmap);
@@ -4830,7 +5260,7 @@ void TABLE::add_read_columns_used_by_index(uint index)
   MY_BITMAP *bitmap= &tmp_set;
   DBUG_ENTER("TABLE::add_read_columns_used_by_index");
 
-  set_keyread(TRUE);
+  enable_keyread();
   bitmap_copy(bitmap, read_set);
   mark_columns_used_by_index_no_reset(index, bitmap);
   column_bitmaps_set(bitmap, write_set);
@@ -4853,7 +5283,7 @@ void TABLE::restore_column_maps_after_mark_index()
 {
   DBUG_ENTER("TABLE::restore_column_maps_after_mark_index");
 
-  set_keyread(FALSE);
+  disable_keyread();
   default_column_bitmaps();
   file->column_bitmaps_signal();
   DBUG_VOID_RETURN;
@@ -4871,7 +5301,14 @@ void TABLE::mark_columns_used_by_index_no_reset(uint index,
   KEY_PART_INFO *key_part_end= (key_part +
                                 key_info[index].key_parts);
   for (;key_part != key_part_end; key_part++)
+  {
     bitmap_set_bit(bitmap, key_part->fieldnr-1);
+    if (key_part->field->vcol_info &&
+        key_part->field->vcol_info->expr_item)
+      key_part->field->vcol_info->
+               expr_item->walk(&Item::register_field_in_bitmap, 
+                               1, (uchar *) bitmap);
+  }
 }
 
 
@@ -4998,6 +5435,8 @@ void TABLE::mark_columns_needed_for_update()
       file->column_bitmaps_signal();
     }
   }
+  /* Mark all virtual columns needed for update */
+  mark_virtual_columns_for_write(FALSE);
   DBUG_VOID_RETURN;
 }
 
@@ -5024,6 +5463,286 @@ void TABLE::mark_columns_needed_for_insert()
   }
   if (found_next_number_field)
     mark_auto_increment_column();
+  /* Mark virtual columns for insert */
+  mark_virtual_columns_for_write(TRUE);
+}
+
+
+/*
+   @brief Mark a column as virtual used by the query
+
+   @param field           the field for the column to be marked
+
+   @details
+     The function marks the column for 'field' as virtual (computed)
+     in the bitmap vcol_set.
+     If the column is marked for the first time the expression to compute
+     the column is traversed and all columns that are occurred there are
+     marked in the read_set of the table.
+
+   @retval
+     TRUE       if column is marked for the first time
+   @retval
+     FALSE      otherwise
+*/
+
+bool TABLE::mark_virtual_col(Field *field)
+{
+  bool res;
+  DBUG_ASSERT(field->vcol_info);
+  if (!(res= bitmap_fast_test_and_set(vcol_set, field->field_index)))
+  {
+    Item *vcol_item= field->vcol_info->expr_item;
+    DBUG_ASSERT(vcol_item);
+    vcol_item->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+  }
+  return res;
+}
+
+
+/* 
+  @brief Mark virtual columns for update/insert commands
+    
+  @param insert_fl    <-> virtual columns are marked for insert command 
+
+  @details
+    The function marks virtual columns used in a update/insert commands
+    in the vcol_set bitmap.
+    For an insert command a virtual column is always marked in write_set if
+    it is a stored column.
+    If a virtual column is from  write_set it is always marked in vcol_set.
+    If a stored virtual column is not from write_set but it is computed
+    through columns from write_set it is also marked in vcol_set, and,
+    besides, it is added to write_set. 
+
+  @return       void
+
+  @note
+    Let table t1 have columns a,b,c and let column c be a stored virtual 
+    column computed through columns a and b. Then for the query
+      UPDATE t1 SET a=1
+    column c will be placed into vcol_set and into write_set while
+    column b will be placed into read_set.
+    If column c was a virtual column, but not a stored virtual column
+    then it would not be added to any of the sets. Column b would not
+    be added to read_set either.           
+*/
+
+void TABLE::mark_virtual_columns_for_write(bool insert_fl)
+{
+  Field **vfield_ptr, *tmp_vfield;
+  bool bitmap_updated= FALSE;
+
+  for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++)
+  {
+    tmp_vfield= *vfield_ptr;
+    if (bitmap_is_set(write_set, tmp_vfield->field_index))
+      bitmap_updated= mark_virtual_col(tmp_vfield);
+    else if (tmp_vfield->stored_in_db)
+    {
+      bool mark_fl= insert_fl;
+      if (!mark_fl)
+      {
+        MY_BITMAP *save_read_set;
+        Item *vcol_item= tmp_vfield->vcol_info->expr_item;
+        DBUG_ASSERT(vcol_item);
+        bitmap_clear_all(&tmp_set);
+        save_read_set= read_set;
+        read_set= &tmp_set;
+        vcol_item->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+        read_set= save_read_set;
+        bitmap_intersect(&tmp_set, write_set);
+        mark_fl= !bitmap_is_clear_all(&tmp_set);
+      }
+      if (mark_fl)
+      {
+        bitmap_set_bit(write_set, tmp_vfield->field_index);
+        mark_virtual_col(tmp_vfield);
+        bitmap_updated= TRUE;
+      }
+    } 
+  }
+  if (bitmap_updated)
+    file->column_bitmaps_signal();
+}
+
+
+/**
+  @brief
+  Allocate space for keys
+
+  @param key_count  number of keys to allocate
+
+  @details
+  The function allocates memory  to fit 'key_count' keys for this table.
+
+  @return FALSE   space was successfully allocated
+  @return TRUE    an error occur
+*/
+
+bool TABLE::alloc_keys(uint key_count)
+{
+  key_info= s->key_info= (KEY*) alloc_root(&mem_root, sizeof(KEY)*key_count);
+  s->keys= 0;
+  max_keys= key_count;
+  return !(key_info);
+}
+
+
+void TABLE::create_key_part_by_field(KEY *keyinfo,
+                                     KEY_PART_INFO *key_part_info,
+                                     Field *field, uint fieldnr)
+{   
+  field->flags|= PART_KEY_FLAG;
+  key_part_info->null_bit= field->null_bit;
+  key_part_info->null_offset= (uint) (field->null_ptr -
+                                      (uchar*) record[0]);
+  key_part_info->field= field;
+  key_part_info->fieldnr= fieldnr;
+  key_part_info->offset= field->offset(record[0]);
+  key_part_info->length=   (uint16) field->pack_length();
+  keyinfo->key_length+= key_part_info->length;
+  key_part_info->key_part_flag= 0;
+  /* TODO:
+    The below method of computing the key format length of the
+    key part is a copy/paste from opt_range.cc, and table.cc.
+    This should be factored out, e.g. as a method of Field.
+    In addition it is not clear if any of the Field::*_length
+    methods is supposed to compute the same length. If so, it
+    might be reused.
+  */
+  key_part_info->store_length= key_part_info->length;
+
+  if (field->real_maybe_null())
+  {
+    key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+    keyinfo->key_length+= HA_KEY_NULL_LENGTH;
+  }
+  if (field->type() == MYSQL_TYPE_BLOB || 
+      field->real_type() == MYSQL_TYPE_VARCHAR)
+  {
+    key_part_info->store_length+= HA_KEY_BLOB_LENGTH;
+    keyinfo->key_length+= HA_KEY_BLOB_LENGTH; // ???
+    key_part_info->key_part_flag|=
+      field->type() == MYSQL_TYPE_BLOB ? HA_BLOB_PART: HA_VAR_LENGTH_PART;
+  }
+
+  key_part_info->type=     (uint8) field->key_type();
+  key_part_info->key_type =
+    ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
+    (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
+    (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
+    0 : FIELDFLAG_BINARY;
+}
+
+
+/**
+  @brief
+  Add one key to a temporary table
+
+  @param key            the number of the key
+  @param key_parts      number of components of the key
+  @param next_field_no  the call-back function that returns the number of
+                        the field used as the next component of the key
+  @param arg            the argument for the above function
+  @param unique         TRUE <=> it is a unique index
+
+  @details
+  The function adds a new key to the table that is assumed to be a temporary
+  table. At each its invocation the call-back function must return
+  the number of the field that is used as the next component of this key.
+
+  @return FALSE is a success
+  @return TRUE if a failure
+
+*/
+
+bool TABLE::add_tmp_key(uint key, uint key_parts,
+                        uint (*next_field_no) (uchar *), uchar *arg,
+                        bool unique)
+{
+  DBUG_ASSERT(key < max_keys);
+
+  char buf[NAME_CHAR_LEN];
+  KEY* keyinfo;
+  Field **reg_field;
+  uint i;
+  bool key_start= TRUE;
+  KEY_PART_INFO* key_part_info=
+      (KEY_PART_INFO*) alloc_root(&mem_root, sizeof(KEY_PART_INFO)*key_parts);
+  if (!key_part_info)
+    return TRUE;
+  keyinfo= key_info + key;
+  keyinfo->key_part= key_part_info;
+  keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
+  keyinfo->key_length=0;
+  keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+  keyinfo->flags= HA_GENERATED_KEY;
+  if (unique)
+    keyinfo->flags|= HA_NOSAME;
+  sprintf(buf, "key%i", key);
+  if (!(keyinfo->name= strdup_root(&mem_root, buf)))
+    return TRUE;
+  keyinfo->rec_per_key= (ulong*) alloc_root(&mem_root,
+                                            sizeof(ulong)*key_parts);
+  if (!keyinfo->rec_per_key)
+    return TRUE;
+  bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
+
+  for (i= 0; i < key_parts; i++)
+  {
+    uint fld_idx= next_field_no(arg); 
+    reg_field= field + fld_idx;
+    if (key_start)
+      (*reg_field)->key_start.set_bit(key);
+    (*reg_field)->part_of_key.set_bit(key);
+    create_key_part_by_field(keyinfo, key_part_info, *reg_field, fld_idx+1);
+    key_start= FALSE;
+    key_part_info++;
+  }
+
+  set_if_bigger(s->max_key_length, keyinfo->key_length);
+  s->keys++;
+  return FALSE;
+}
+
+/*
+  @brief
+  Drop all indexes except specified one.
+
+  @param key_to_save the key to save
+
+  @details
+  Drop all indexes on this table except 'key_to_save'. The saved key becomes
+  key #0. Memory occupied by key parts of dropped keys are freed.
+  If the 'key_to_save' is negative then all keys are freed.
+*/
+
+void TABLE::use_index(int key_to_save)
+{
+  uint i= 1;
+  DBUG_ASSERT(!created && key_to_save < (int)s->keys);
+  if (key_to_save >= 0)
+    /* Save the given key. */
+    memmove(key_info, key_info + key_to_save, sizeof(KEY));
+  else
+    /* Drop all keys; */
+    i= 0;
+
+  s->keys= (key_to_save < 0) ? 0 : 1;
+}
+
+/*
+  Return TRUE if the table is filled at execution phase 
+  
+  (and so, the optimizer must not do anything that depends on the contents of
+   the table, like range analysis or constant table detection)
+*/
+
+bool TABLE::is_filled_at_execution()
+{ 
+  return test(pos_in_table_list->jtbm_subselect || 
+              pos_in_table_list->is_active_sjm());
 }
 
 
@@ -5059,6 +5778,7 @@ void TABLE_LIST::reinit_before_use(THD *thd)
   mdl_request.ticket= NULL;
 }
 
+
 /*
   Return subselect that contains the FROM list this table is taken from
 
@@ -5087,10 +5807,10 @@ Item_subselect *TABLE_LIST::containing_subselect()
   DESCRIPTION
     The parser collects the index hints for each table in a "tagged list" 
     (TABLE_LIST::index_hints). Using the information in this tagged list
-    this function sets the members st_table::keys_in_use_for_query,
-    st_table::keys_in_use_for_group_by, st_table::keys_in_use_for_order_by,
-    st_table::force_index, st_table::force_index_order,
-    st_table::force_index_group and st_table::covering_keys.
+    this function sets the members TABLE::keys_in_use_for_query,
+    TABLE::keys_in_use_for_group_by, TABLE::keys_in_use_for_order_by,
+    TABLE::force_index, TABLE::force_index_order,
+    TABLE::force_index_group and TABLE::covering_keys.
 
     Current implementation of the runtime does not allow mixing FORCE INDEX
     and USE INDEX, so this is checked here. Then the FORCE INDEX list 
@@ -5350,6 +6070,347 @@ bool is_simple_order(ORDER *order)
   return TRUE;
 }
 
+/*
+  @brief Compute values for virtual columns used in query
+
+  @param  thd              Thread handle
+  @param  table            The TABLE object
+  @param  for_write        Requests to compute only fields needed for write   
+  
+  @details
+    The function computes the values of the virtual columns of the table and
+    stores them in the table record buffer.
+    Only fields from vcol_set are computed, and, when the flag for_write is not
+    set to TRUE, a virtual field is computed only if it's not stored.
+    The flag for_write is set to TRUE for row insert/update operations. 
+ 
+  @retval
+    0    Success
+  @retval
+    >0   Error occurred when storing a virtual field value
+*/
+
+int update_virtual_fields(THD *thd, TABLE *table, bool for_write)
+{
+  DBUG_ENTER("update_virtual_fields");
+  Field **vfield_ptr, *vfield;
+  int error= 0;
+  if (!table || !table->vfield)
+    DBUG_RETURN(0);
+
+  thd->reset_arena_for_cached_items(table->expr_arena);
+  /* Iterate over virtual fields in the table */
+  for (vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
+  {
+    vfield= (*vfield_ptr);
+    DBUG_ASSERT(vfield->vcol_info && vfield->vcol_info->expr_item);
+    /* Only update those fields that are marked in the vcol_set bitmap */
+    if (bitmap_is_set(table->vcol_set, vfield->field_index) &&
+        (for_write || !vfield->stored_in_db))
+    {
+      /* Compute the actual value of the virtual fields */
+      error= vfield->vcol_info->expr_item->save_in_field(vfield, 0);
+      DBUG_PRINT("info", ("field '%s' - updated", vfield->field_name));
+    }
+    else
+    {
+      DBUG_PRINT("info", ("field '%s' - skipped", vfield->field_name));
+    }
+  }
+  thd->reset_arena_for_cached_items(0);
+  DBUG_RETURN(0);
+}
+
+/*
+  @brief Reset const_table flag
+
+  @detail
+  Reset const_table flag for this table. If this table is a merged derived
+  table/view the flag is recursively reseted for all tables of the underlying
+  select.
+*/
+
+void TABLE_LIST::reset_const_table()
+{
+  table->const_table= 0;
+  if (is_merged_derived())
+  {
+    SELECT_LEX *select_lex= get_unit()->first_select();
+    TABLE_LIST *tl;
+    List_iterator<TABLE_LIST> ti(select_lex->leaf_tables);
+    while ((tl= ti++))
+      tl->reset_const_table();
+  }
+}
+
+
+/*
+  @brief Run derived tables/view handling phases on underlying select_lex.
+
+  @param lex    LEX for this thread
+  @param phases derived tables/views handling phases to run
+                (set of DT_XXX constants)
+  @details
+  This function runs this derived table through specified 'phases'.
+  Underlying tables of this select are handled prior to this derived.
+  'lex' is passed as an argument to called functions.
+
+  @return TRUE on error
+  @return FALSE ok
+*/
+
+bool TABLE_LIST::handle_derived(LEX *lex, uint phases)
+{
+  SELECT_LEX_UNIT *unit= get_unit();
+  if (unit)
+  {
+    for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
+      if (sl->handle_derived(lex, phases))
+        return TRUE;
+    return mysql_handle_single_derived(lex, this, phases);
+  }
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Return unit of this derived table/view
+
+  @return reference to a unit  if it's a derived table/view.
+  @return 0                    when it's not a derived table/view.
+*/
+
+st_select_lex_unit *TABLE_LIST::get_unit()
+{
+  return (view ? &view->unit : derived);
+}
+
+
+/**
+  @brief
+  Return select_lex of this derived table/view
+
+  @return select_lex of this derived table/view.
+  @return 0          when it's not a derived table.
+*/
+
+st_select_lex *TABLE_LIST::get_single_select()
+{
+  SELECT_LEX_UNIT *unit= get_unit();
+  return (unit ? unit->first_select() : 0);
+}
+
+
+/**
+  @brief
+  Attach a join table list as a nested join to this TABLE_LIST.
+
+  @param join_list join table list to attach
+
+  @details
+  This function wraps 'join_list' into a nested_join of this table, thus
+  turning it to a nested join leaf.
+*/
+
+void TABLE_LIST::wrap_into_nested_join(List<TABLE_LIST> &join_list)
+{
+  TABLE_LIST *tl;
+  /*
+    Walk through derived table top list and set 'embedding' to point to
+    the nesting table.
+  */
+  nested_join->join_list.empty();
+  List_iterator_fast<TABLE_LIST> li(join_list);
+  nested_join->join_list= join_list;
+  while ((tl= li++))
+  {
+    tl->embedding= this;
+    tl->join_list= &nested_join->join_list;
+  }
+}
+
+
+/**
+  @brief
+  Initialize this derived table/view
+
+  @param thd  Thread handle
+
+  @details
+  This function makes initial preparations of this derived table/view for
+  further processing:
+    if it's a derived table this function marks it either as mergeable or
+      materializable
+    creates temporary table for name resolution purposes
+    creates field translation for mergeable derived table/view
+
+  @return TRUE  an error occur
+  @return FALSE ok
+*/
+
+bool TABLE_LIST::init_derived(THD *thd, bool init_view)
+{
+  SELECT_LEX *first_select= get_single_select();
+  SELECT_LEX_UNIT *unit= get_unit();
+
+  if (!unit)
+    return FALSE;
+  /*
+    Check whether we can merge this derived table into main select.
+    Depending on the result field translation will or will not
+    be created.
+  */
+  TABLE_LIST *first_table= (TABLE_LIST *) first_select->table_list.first;
+  if (first_select->table_list.elements > 1 ||
+      (first_table && first_table->is_multitable()))
+    set_multitable();
+
+  unit->derived= this;
+  if (init_view && !view)
+  {
+    /* This is all what we can do for a derived table for now. */
+    set_derived();
+  }
+
+  if (!is_view())
+  {
+    /* A subquery might be forced to be materialized due to a side-effect. */
+    if (!is_materialized_derived() && first_select->is_mergeable() &&
+        optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_MERGE) &&
+        !(thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+          thd->lex->sql_command == SQLCOM_DELETE_MULTI))
+      set_merged_derived();
+    else
+      set_materialized_derived();
+  }
+  /*
+    Derived tables/view are materialized prior to UPDATE, thus we can skip
+    them from table uniqueness check
+  */
+  if (is_materialized_derived())
+  {
+    unit->master_unit()->set_unique_exclude();
+  }
+  /*
+    Create field translation for mergeable derived tables/views.
+    For derived tables field translation can be created only after
+    unit is prepared so all '*' are get unrolled.
+  */
+  if (is_merged_derived())
+  {
+    if (is_view() || unit->prepared)
+      create_field_translation(thd);
+  }
+
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Retrieve number of rows in the table
+
+  @details
+  Retrieve number of rows in the table referred by this TABLE_LIST and
+  store it in the table's stats.records variable. If this TABLE_LIST refers
+  to a materialized derived table/view then the estimated number of rows of
+  the derived table/view is used instead.
+
+  @return 0          ok
+  @return non zero   error
+*/
+
+int TABLE_LIST::fetch_number_of_rows()
+{
+  int error= 0;
+  if (is_materialized_derived() && !fill_me)
+
+  {
+    table->file->stats.records= ((select_union*)derived->result)->records;
+    set_if_bigger(table->file->stats.records, 2);
+  }
+  else
+    error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+  return error;
+}
+
+/*
+  Procedure of keys generation for result tables of materialized derived
+  tables/views.
+
+  A key is generated for each equi-join pair derived table-another table.
+  Each generated key consists of fields of derived table used in equi-join.
+  Example:
+
+    SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+                  t1 ON tt.f1=t1.f3 and tt.f2.=t1.f4;
+  In this case for the derived table tt one key will be generated. It will
+  consist of two parts f1 and f2.
+  Example:
+
+    SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+                  t1 ON tt.f1=t1.f3 JOIN
+                  t2 ON tt.f2=t2.f4;
+  In this case for the derived table tt two keys will be generated.
+  One key over f1 field, and another key over f2 field.
+  Currently optimizer may choose to use only one such key, thus the second
+  one will be dropped after range optimizer is finished.
+  See also JOIN::drop_unused_derived_keys function.
+  Example:
+
+    SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+                  t1 ON tt.f1=a_function(t1.f3);
+  In this case for the derived table tt one key will be generated. It will
+  consist of one field - f1.
+*/
+
+
+
+/*
+  @brief
+  Change references to underlying items of a merged derived table/view
+  for fields in derived table's result table.
+
+  @return FALSE ok
+  @return TRUE  Out of memory
+*/
+bool TABLE_LIST::change_refs_to_fields()
+{
+  List_iterator<Item> li(used_items);
+  Item_direct_ref *ref;
+  Field_iterator_view field_it;
+  THD *thd= table->in_use;
+  DBUG_ASSERT(is_merged_derived());
+
+  if (!used_items.elements)
+    return FALSE;
+
+  materialized_items= (Item**)thd->calloc(sizeof(void*) * table->s->fields);
+
+  while ((ref= (Item_direct_ref*)li++))
+  {
+    uint idx;
+    Item *orig_item= *ref->ref;
+    field_it.set(this);
+    for (idx= 0; !field_it.end_of_fields(); field_it.next(), idx++)
+    {
+      if (field_it.item() == orig_item)
+        break;
+    }
+    DBUG_ASSERT(!field_it.end_of_fields());
+    if (!materialized_items[idx])
+    {
+      materialized_items[idx]= new Item_field(table->field[idx]);
+      if (!materialized_items[idx])
+        return TRUE;
+    }
+    ref->ref= materialized_items + idx;
+  }
+
+  return FALSE;
+}
+
 
 /*****************************************************************************
 ** Instansiate templates
diff --git a/sql/table.h b/sql/table.h
index be14270e12b..b22cdb6e76b 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -1,7 +1,7 @@
 #ifndef TABLE_INCLUDED
 #define TABLE_INCLUDED
-
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -21,6 +21,7 @@
 #include "sql_list.h"                           /* Sql_alloc */
 #include "mdl.h"
 #include "datadict.h"
+#include "sql_string.h"                         /* String */
 
 #ifndef MYSQL_CLIENT
 
@@ -164,6 +165,9 @@ protected:
   CHARSET_INFO *m_connection_cl;
 };
 
+class Query_arena;
+
+/*************************************************************************/
 
 /**
  View_creation_ctx -- creation context of view objects.
@@ -200,7 +204,8 @@ typedef struct st_order {
   bool   counter_used;                  /* parameter was counter of columns */
   Field  *field;			/* If tmp-table group */
   char	 *buff;				/* If tmp-table group */
-  table_map used, depend_map;
+  table_map used; /* NOTE: the below is only set to 0 but is still used by eq_ref_table */
+  table_map depend_map;
 } ORDER;
 
 /**
@@ -561,6 +566,9 @@ struct TABLE_SHARE
   I_P_List <TABLE, TABLE_share> used_tables;
   I_P_List <TABLE, TABLE_share> free_tables;
 
+  engine_option_value *option_list;     /* text options for table */
+  ha_table_option_struct *option_struct; /* structure with parsed options */
+
   /* The following is copied to each TABLE on OPEN */
   Field **field;
   Field **found_next_number_field;
@@ -600,6 +608,8 @@ struct TABLE_SHARE
   ulong   avg_row_length;		/* create information */
   ulong   version, mysql_version;
   ulong   reclength;			/* Recordlength */
+  /* Stored record length. No generated-only virtual fields are included */
+  ulong   stored_rec_length;            
 
   plugin_ref db_plugin;			/* storage engine plugin */
   inline handlerton *db_type() const	/* table_type for handler */
@@ -610,11 +620,23 @@ struct TABLE_SHARE
   enum row_type row_type;		/* How rows are stored */
   enum tmp_table_type tmp_table;
 
+  /** Transactional or not. */
+  enum ha_choice transactional;
+  /** Per-page checksums or not. */
+  enum ha_choice page_checksum;
+
   uint ref_count;                       /* How many TABLE objects uses this */
   uint blob_ptr_size;			/* 4 or 8 */
   uint key_block_size;			/* create key_block_size, if used */
   uint null_bytes, last_null_bit_pos;
+  /*
+    Same as null_bytes, except that if there is only a 'delete-marker' in
+    the record then this value is 0.
+  */
+  uint null_bytes_for_compare;
   uint fields;				/* Number of fields */
+  /* Number of stored fields, generated-only virtual fields are not included */
+  uint stored_fields;                   
   uint rec_buff_length;                 /* Size of table->record[] buffer */
   uint keys, key_parts;
   uint max_key_length, max_unique_length, total_key_length;
@@ -635,12 +657,14 @@ struct TABLE_SHARE
   uint error, open_errno, errarg;       /* error from open_table_def() */
   uint column_bitmap_size;
   uchar frm_version;
+  uint vfields;                         /* Number of computed (virtual) fields */
   bool null_field_first;
   bool system;                          /* Set if system table (one record) */
   bool crypted;                         /* If .frm file is crypted */
-  bool db_low_byte_first;		/* Portable row format */
   bool crashed;
   bool is_view;
+  bool deleting;                        /* going to delete this table */
+  bool can_cmp_whole_record;
   ulong table_map_id;                   /* for row-based replication */
 
   /*
@@ -651,16 +675,6 @@ struct TABLE_SHARE
   */
   int cached_row_logging_check;
 
-  /*
-    Storage media to use for this table (unless another storage
-    media has been specified on an individual column - in versions
-    where that is supported)
-  */
-  enum ha_storage_media default_storage_media;
-
-  /* Name of the tablespace used for this table */
-  char *tablespace;
-
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   /* filled in when reading from frm */
   bool auto_partitioned;
@@ -693,7 +707,6 @@ struct TABLE_SHARE
   void (*ha_part_data_destroy)(HA_DATA_PARTITION *);
 #endif
 
-
   /** Instrumentation for this table share. */
   PSI_table_share *m_psi;
 
@@ -925,7 +938,7 @@ public:
     needed by the query without reading the row.
   */
   key_map covering_keys;
-  key_map quick_keys, merge_keys;
+  key_map quick_keys, merge_keys,intersect_keys;
   /*
     A set of keys that can be used in the query that references this
     table.
@@ -947,6 +960,7 @@ public:
   Field *next_number_field;		/* Set if next_number is activated */
   Field *found_next_number_field;	/* Set on open */
   Field_timestamp *timestamp_field;
+  Field **vfield;                       /* Pointer to virtual fields*/
 
   /* Table's triggers, 0 if there are no of them */
   Table_triggers_list *triggers;
@@ -954,11 +968,12 @@ public:
   /* Position in thd->locked_table_list under LOCK TABLES */
   TABLE_LIST *pos_in_locked_tables;
   ORDER		*group;
-  const char	*alias;            	  /* alias or table name */
+  String	alias;            	  /* alias or table name */
   uchar		*null_flags;
   my_bitmap_map	*bitmap_init_value;
-  MY_BITMAP     def_read_set, def_write_set, tmp_set; /* containers */
-  MY_BITMAP     *read_set, *write_set;          /* Active column sets */
+  MY_BITMAP     def_read_set, def_write_set, def_vcol_set, tmp_set; 
+  MY_BITMAP     eq_join_set;         /* used to mark equi-joined fields */
+  MY_BITMAP     *read_set, *write_set, *vcol_set; /* Active column sets */
   /*
    The ID of the query that opened and is using this table. Has different
    meanings depending on the table type.
@@ -1026,7 +1041,7 @@ public:
   /* number of select if it is derived table */
   uint          derived_select_number;
   int		current_lock;           /* Type of lock on table */
-  my_bool copy_blobs;			/* copy_blobs when storing */
+  bool copy_blobs;			/* copy_blobs when storing */
 
   /*
     0 or JOIN_TYPE_{LEFT|RIGHT}. Currently this is only compared to 0.
@@ -1038,60 +1053,70 @@ public:
     If true, the current table row is considered to have all columns set to 
     NULL, including columns declared as "not null" (see maybe_null).
   */
-  my_bool null_row;
+  bool null_row;
 
   /*
     TODO: Each of the following flags take up 8 bits. They can just as easily
     be put into one single unsigned long and instead of taking up 18
     bytes, it would take up 4.
   */
-  my_bool force_index;
+  bool force_index;
 
   /**
     Flag set when the statement contains FORCE INDEX FOR ORDER BY
     See TABLE_LIST::process_index_hints().
   */
-  my_bool force_index_order;
+  bool force_index_order;
 
   /**
     Flag set when the statement contains FORCE INDEX FOR GROUP BY
     See TABLE_LIST::process_index_hints().
   */
-  my_bool force_index_group;
-  my_bool distinct,const_table,no_rows;
+  bool force_index_group;
+  bool distinct,const_table,no_rows, used_for_duplicate_elimination;
 
   /**
      If set, the optimizer has found that row retrieval should access index 
      tree only.
    */
-  my_bool key_read;
-  my_bool no_keyread;
-  my_bool locked_by_logger;
-  my_bool no_replicate;
-  my_bool locked_by_name;
-  my_bool fulltext_searched;
-  my_bool no_cache;
+  bool key_read;
+  bool no_keyread;
+  bool locked_by_logger;
+  bool no_replicate;
+  bool locked_by_name;
+  bool fulltext_searched;
+  bool no_cache;
   /* To signal that the table is associated with a HANDLER statement */
-  my_bool open_by_handler;
+  bool open_by_handler;
   /*
     To indicate that a non-null value of the auto_increment field
     was provided by the user or retrieved from the current record.
     Used only in the MODE_NO_AUTO_VALUE_ON_ZERO mode.
   */
-  my_bool auto_increment_field_not_null;
-  my_bool insert_or_update;             /* Can be used by the handler */
-  my_bool alias_name_used;		/* true if table_name is alias */
-  my_bool get_fields_in_item_tree;      /* Signal to fix_field */
-  my_bool m_needs_reopen;
+  bool auto_increment_field_not_null;
+  bool insert_or_update;             /* Can be used by the handler */
+  bool alias_name_used;              /* true if table_name is alias */
+  bool get_fields_in_item_tree;      /* Signal to fix_field */
+  bool m_needs_reopen;
+  bool created;    /* For tmp tables. TRUE <=> tmp table was actually created.*/
 
   REGINFO reginfo;			/* field connections */
   MEM_ROOT mem_root;
   GRANT_INFO grant;
   FILESORT_INFO sort;
+  /*
+    The arena which the items for expressions from the table definition
+    are associated with.  
+    Currently only the items of the expressions for virtual columns are
+    associated with this arena.
+    TODO: To attach the partitioning expressions to this arena.  
+  */
+  Query_arena *expr_arena;
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   partition_info *part_info;            /* Partition related information */
   bool no_partitions_used; /* If true, all partitions have been pruned away */
 #endif
+  uint max_keys; /* Size of allocated key_info array. */
   MDL_ticket *mdl_ticket;
 
   void init(THD *thd, TABLE_LIST *tl);
@@ -1107,6 +1132,8 @@ public:
   void mark_columns_needed_for_update(void);
   void mark_columns_needed_for_delete(void);
   void mark_columns_needed_for_insert(void);
+  bool mark_virtual_col(Field *field);
+  void mark_virtual_columns_for_write(bool insert_fl);
   inline void column_bitmaps_set(MY_BITMAP *read_set_arg,
                                  MY_BITMAP *write_set_arg)
   {
@@ -1115,12 +1142,30 @@ public:
     if (file)
       file->column_bitmaps_signal();
   }
+  inline void column_bitmaps_set(MY_BITMAP *read_set_arg,
+                                 MY_BITMAP *write_set_arg,
+                                 MY_BITMAP *vcol_set_arg)
+  {
+    read_set= read_set_arg;
+    write_set= write_set_arg;
+    vcol_set= vcol_set_arg;
+    if (file)
+      file->column_bitmaps_signal();
+  }
   inline void column_bitmaps_set_no_signal(MY_BITMAP *read_set_arg,
                                            MY_BITMAP *write_set_arg)
   {
     read_set= read_set_arg;
     write_set= write_set_arg;
   }
+  inline void column_bitmaps_set_no_signal(MY_BITMAP *read_set_arg,
+                                           MY_BITMAP *write_set_arg,
+                                           MY_BITMAP *vcol_set_arg)
+  {
+    read_set= read_set_arg;
+    write_set= write_set_arg;
+    vcol_set= vcol_set_arg;
+  }
   inline void use_all_columns()
   {
     column_bitmaps_set(&s->all_set, &s->all_set);
@@ -1129,24 +1174,47 @@ public:
   {
     read_set= &def_read_set;
     write_set= &def_write_set;
+    vcol_set= &def_vcol_set;
   }
   /** Should this instance of the table be reopened? */
   inline bool needs_reopen()
   { return !db_stat || m_needs_reopen; }
 
-  inline void set_keyread(bool flag)
+  bool alloc_keys(uint key_count);
+  bool add_tmp_key(uint key, uint key_parts,
+                   uint (*next_field_no) (uchar *), uchar *arg,
+                   bool unique);
+  void create_key_part_by_field(KEY *keyinfo, KEY_PART_INFO *key_part_info,
+                                Field *field, uint fieldnr);
+  void use_index(int key_to_save);
+  void set_table_map(table_map map_arg, uint tablenr_arg)
   {
-    DBUG_ASSERT(file);
-    if (flag && !key_read)
-    {
-      key_read= 1;
-      file->extra(HA_EXTRA_KEYREAD);
-    }
-    else if (!flag && key_read)
+    map= map_arg;
+    tablenr= tablenr_arg;
+  }
+  inline void enable_keyread()
+  {
+    DBUG_ENTER("enable_keyread");
+    DBUG_ASSERT(key_read == 0);
+    key_read= 1;
+    file->extra(HA_EXTRA_KEYREAD);
+    DBUG_VOID_RETURN;
+  }
+  /*
+    Returns TRUE if the table is filled at execution phase (and so, the
+    optimizer must not do anything that depends on the contents of the table,
+    like range analysis or constant table detection)
+  */
+  bool is_filled_at_execution();
+  inline void disable_keyread()
+  {
+    DBUG_ENTER("disable_keyread");
+    if (key_read)
     {
       key_read= 0;
       file->extra(HA_EXTRA_NO_KEYREAD);
     }
+    DBUG_VOID_RETURN;
   }
 
   bool update_const_key_parts(COND *conds);
@@ -1257,13 +1325,52 @@ typedef struct st_schema_table
 } ST_SCHEMA_TABLE;
 
 
+/*
+  Types of derived tables. The ending part is a bitmap of phases that are
+  applicable to a derived table of the type.
+ * /
+#define VIEW_ALGORITHM_UNDEFINED        0
+#define VIEW_ALGORITHM_MERGE            1 + DT_COMMON + DT_MERGE
+#define DERIVED_ALGORITHM_MERGE         2 + DT_COMMON + DT_MERGE
+#define VIEW_ALGORITHM_TMPTABLE         3 + DT_COMMON + DT_MATERIALIZE
+#define DERIVED_ALGORITHM_MATERIALIZE   4 + DT_COMMON + DT_MATERIALIZE
+*/
+#define DTYPE_ALGORITHM_UNDEFINED    0
+#define DTYPE_VIEW                   1
+#define DTYPE_TABLE                  2
+#define DTYPE_MERGE                  4
+#define DTYPE_MATERIALIZE            8
+#define DTYPE_MULTITABLE             16
+#define DTYPE_MASK                   19
+
+/*
+  Phases of derived tables/views handling, see sql_derived.cc
+  Values are used as parts of a bitmap attached to derived table types.
+*/
+#define DT_INIT             1
+#define DT_PREPARE          2
+#define DT_OPTIMIZE         4
+#define DT_MERGE            8
+#define DT_MERGE_FOR_INSERT 16
+#define DT_CREATE           32
+#define DT_FILL             64
+#define DT_REINIT           128
+#define DT_PHASES           8
+/* Phases that are applicable to all derived tables. */
+#define DT_COMMON       (DT_INIT + DT_PREPARE + DT_REINIT + DT_OPTIMIZE)
+/* Phases that are applicable only to materialized derived tables. */
+#define DT_MATERIALIZE  (DT_CREATE + DT_FILL)
+
+#define DT_PHASES_MERGE (DT_COMMON | DT_MERGE | DT_MERGE_FOR_INSERT)
+#define DT_PHASES_MATERIALIZE (DT_COMMON | DT_MATERIALIZE)
+
+#define VIEW_ALGORITHM_UNDEFINED 0
+#define VIEW_ALGORITHM_MERGE    (DTYPE_VIEW | DTYPE_MERGE)
+#define VIEW_ALGORITHM_TMPTABLE (DTYPE_VIEW + DTYPE_MATERIALIZE )
+
 #define JOIN_TYPE_LEFT	1
 #define JOIN_TYPE_RIGHT	2
 
-#define VIEW_ALGORITHM_UNDEFINED        0
-#define VIEW_ALGORITHM_TMPTABLE         1
-#define VIEW_ALGORITHM_MERGE            2
-
 #define VIEW_SUID_INVOKER               0
 #define VIEW_SUID_DEFINER               1
 #define VIEW_SUID_DEFAULT               2
@@ -1336,6 +1443,11 @@ enum enum_open_type
 };
 
 
+class SJ_MATERIALIZATION_INFO;
+class Index_hint;
+class Item_in_subselect;
+
+
 /*
   Table reference in the FROM clause.
 
@@ -1347,7 +1459,7 @@ enum enum_open_type
   1) table (TABLE_LIST::view == NULL)
      - base table
        (TABLE_LIST::derived == NULL)
-     - subquery - TABLE_LIST::table is a temp table
+     - FROM-clause subquery - TABLE_LIST::table is a temp table
        (TABLE_LIST::derived != NULL)
      - information schema table
        (TABLE_LIST::schema_table != NULL)
@@ -1357,6 +1469,7 @@ enum enum_open_type
            also (TABLE_LIST::field_translation != NULL)
      - tmptable (TABLE_LIST::effective_algorithm == VIEW_ALGORITHM_TMPTABLE)
            also (TABLE_LIST::field_translation == NULL)
+  2.5) TODO: Add derived tables description here
   3) nested table reference (TABLE_LIST::nested_join != NULL)
      - table sequence - e.g. (t1, t2, t3)
        TODO: how to distinguish from a JOIN?
@@ -1366,6 +1479,8 @@ enum enum_open_type
        (TABLE_LIST::natural_join != NULL)
        - JOIN ... USING
          (TABLE_LIST::join_using_fields != NULL)
+     - semi-join nest (sj_on_expr!= NULL && sj_subq_pred!=NULL)
+  4) jtbm semi-join (jtbm_subselect != NULL)
 */
 
 struct LEX;
@@ -1390,7 +1505,7 @@ struct TABLE_LIST
     db_length= db_length_arg;
     table_name= (char*) table_name_arg;
     table_name_length= table_name_length_arg;
-    alias= (char*) alias_arg;
+    alias= (char*) (alias_arg ? alias_arg : table_name_arg);
     lock_type= lock_type_arg;
     mdl_request.init(MDL_key::TABLE, db, table_name,
                      (lock_type >= TL_WRITE_ALLOW_WRITE) ?
@@ -1409,6 +1524,28 @@ struct TABLE_LIST
   char		*db, *alias, *table_name, *schema_table_name;
   char          *option;                /* Used by cache index  */
   Item		*on_expr;		/* Used with outer join */
+
+  Item          *sj_on_expr;
+  /*
+    (Valid only for semi-join nests) Bitmap of tables that are within the
+    semi-join (this is different from bitmap of all nest's children because
+    tables that were pulled out of the semi-join nest remain listed as
+    nest's children).
+  */
+  table_map     sj_inner_tables;
+  /* Number of IN-compared expressions */
+  uint          sj_in_exprs;
+  
+  /* If this is a non-jtbm semi-join nest: corresponding subselect predicate */
+  Item_in_subselect  *sj_subq_pred;
+
+  /* If this is a jtbm semi-join object: corresponding subselect predicate */
+  Item_in_subselect  *jtbm_subselect;
+  /* TODO: check if this can be joined with tablenr_exec */
+  uint jtbm_table_no;
+
+  SJ_MATERIALIZATION_INFO *sj_mat_info;
+
   /*
     The structure of ON expression presented in the member above
     can be changed during certain optimizations. This member
@@ -1459,6 +1596,8 @@ struct TABLE_LIST
     filling procedure
   */
   select_union  *derived_result;
+  /* Stub used for materialized derived tables. */
+  table_map	map;                    /* ID bit of table (1,2,4,8,16...) */
   /*
     Reference from aux_tables to local list entry of main select of
     multi-delete statement:
@@ -1503,6 +1642,7 @@ struct TABLE_LIST
   Field_translator *field_translation;	/* array of VIEW fields */
   /* pointer to element after last one in translation table above */
   Field_translator *field_translation_end;
+  bool field_translation_updated;
   /*
     List (based on next_local) of underlying tables of this view. I.e. it
     does not include the tables of subqueries used in the view. Is set only
@@ -1517,11 +1657,20 @@ struct TABLE_LIST
   List<TABLE_LIST> *view_tables;
   /* most upper view this table belongs to */
   TABLE_LIST	*belong_to_view;
+  /* A derived table this table belongs to */
+  TABLE_LIST    *belong_to_derived;
   /*
     The view directly referencing this table
     (non-zero only for merged underlying tables of a view).
   */
   TABLE_LIST	*referencing_view;
+
+  table_map view_used_tables;
+  table_map     map_exec;
+  /* TODO: check if this can be joined with jtbm_table_no */
+  uint          tablenr_exec;
+  uint          maybe_null_exec;
+
   /* Ptr to parent MERGE table list item. See top comment in ha_myisammrg.cc */
   TABLE_LIST    *parent_l;
   /*
@@ -1534,13 +1683,7 @@ struct TABLE_LIST
     SQL SECURITY DEFINER)
   */
   Security_context *view_sctx;
-  /*
-    List of all base tables local to a subquery including all view
-    tables. Unlike 'next_local', this in this list views are *not*
-    leaves. Created in setup_tables() -> make_leaves_list().
-  */
   bool allowed_show;
-  TABLE_LIST	*next_leaf;
   Item          *where;                 /* VIEW WHERE clause condition */
   Item          *check_option;          /* WITH CHECK OPTION condition */
   LEX_STRING	select_stmt;		/* text of (CREATE/SELECT) statement */
@@ -1576,7 +1719,7 @@ struct TABLE_LIST
       - VIEW_ALGORITHM_MERGE
       @to do Replace with an enum 
   */
-  uint8         effective_algorithm;
+  uint8         derived_type;
   GRANT_INFO	grant;
   /* data need by some engines in query cache*/
   ulonglong     engine_data;
@@ -1607,7 +1750,6 @@ struct TABLE_LIST
   enum enum_open_type open_type;
   /* TRUE if this merged view contain auto_increment field */
   bool          contain_auto_increment;
-  bool          multitable_view;        /* TRUE iff this is multitable view */
   bool          compact_view_format;    /* Use compact format for SHOW CREATE VIEW */
   /* view where processed */
   bool          where_processed;
@@ -1646,6 +1788,18 @@ struct TABLE_LIST
   */
   bool          is_fqtn;
 
+  bool          deleting;               /* going to delete this table */
+
+  /* TRUE <=> derived table should be filled right after optimization. */
+  bool          fill_me;
+  /* TRUE <=> view/DT is merged. */
+  bool          merged;
+  bool          merged_for_insert;
+  /* TRUE <=> don't prepare this derived table/view as it should be merged.*/
+  bool          skip_prepare_derived;
+
+  List<Item>    used_items;
+  Item          **materialized_items;
 
   /* View creation context. */
 
@@ -1690,15 +1844,16 @@ struct TABLE_LIST
   MDL_request mdl_request;
 
   void calc_md5(char *buffer);
-  void set_underlying_merge();
   int view_check_option(THD *thd, bool ignore_failure);
+  bool create_field_translation(THD *thd);
   bool setup_underlying(THD *thd);
   void cleanup_items();
   bool placeholder()
   {
     return derived || view || schema_table || !table;
   }
-  void print(THD *thd, String *str, enum_query_type query_type);
+  void print(THD *thd, table_map eliminated_tables, String *str, 
+             enum_query_type query_type);
   bool check_single_table(TABLE_LIST **table, table_map map,
                           TABLE_LIST *view);
   bool set_insert_values(MEM_ROOT *mem_root);
@@ -1719,7 +1874,7 @@ struct TABLE_LIST
   inline bool prepare_where(THD *thd, Item **conds,
                             bool no_where_clause)
   {
-    if (effective_algorithm == VIEW_ALGORITHM_MERGE)
+    if (!view || is_merged_derived())
       return prep_where(thd, conds, no_where_clause);
     return FALSE;
   }
@@ -1776,6 +1931,60 @@ struct TABLE_LIST
     m_table_ref_version= table_ref_version_arg;
   }
 
+  /* Set of functions returning/setting state of a derived table/view. */
+  inline bool is_non_derived()
+  {
+    return (!derived_type);
+  }
+  inline bool is_view_or_derived()
+  {
+    return (derived_type);
+  }
+  inline bool is_view()
+  {
+    return (derived_type & DTYPE_VIEW);
+  }
+  inline bool is_derived()
+  {
+    return (derived_type & DTYPE_TABLE);
+  }
+  inline void set_view()
+  {
+    derived_type= DTYPE_VIEW;
+  }
+  inline void set_derived()
+  {
+    derived_type= DTYPE_TABLE;
+  }
+  inline bool is_merged_derived()
+  {
+    return (derived_type & DTYPE_MERGE);
+  }
+  inline void set_merged_derived()
+  {
+    derived_type= ((derived_type & DTYPE_MASK) |
+                    DTYPE_TABLE | DTYPE_MERGE);
+  }
+  inline bool is_materialized_derived()
+  {
+    return (derived_type & DTYPE_MATERIALIZE);
+  }
+  inline void set_materialized_derived()
+  {
+    derived_type= ((derived_type & DTYPE_MASK) |
+                    DTYPE_TABLE | DTYPE_MATERIALIZE);
+  }
+  inline bool is_multitable()
+  {
+    return (derived_type & DTYPE_MULTITABLE);
+  }
+  inline void set_multitable()
+  {
+    derived_type|= DTYPE_MULTITABLE;
+  }
+  void reset_const_table();
+  bool handle_derived(LEX *lex, uint phases);
+
   /**
      @brief True if this TABLE_LIST represents an anonymous derived table,
      i.e.  the result of a subquery.
@@ -1795,6 +2004,14 @@ struct TABLE_LIST
      respectively.
    */
   char *get_table_name() { return view != NULL ? view_name.str : table_name; }
+  bool is_active_sjm();
+  bool is_jtbm() { return test(jtbm_subselect!=NULL); }
+  st_select_lex_unit *get_unit();
+  st_select_lex *get_single_select();
+  void wrap_into_nested_join(List<TABLE_LIST> &join_list);
+  bool init_derived(THD *thd, bool init_view);
+  int fetch_number_of_rows();
+  bool change_refs_to_fields();
 
 private:
   bool prep_check_option(THD *thd, uint8 check_opt_type);
@@ -1931,7 +2148,11 @@ public:
 typedef struct st_nested_join
 {
   List<TABLE_LIST>  join_list;       /* list of elements in the nested join */
-  table_map         used_tables;     /* bitmap of tables in the nested join */
+  /* 
+    Bitmap of tables within this nested join (including those embedded within
+    its children), including tables removed by table elimination.
+  */
+  table_map         used_tables;
   table_map         not_null_tables; /* tables that rejects nulls           */
   /**
     Used for pointing out the first table in the plan being covered by this
@@ -1946,7 +2167,20 @@ typedef struct st_nested_join
     Before each use the counters are zeroed by reset_nj_counters.
   */
   uint              counter;
+  /*
+    Number of elements in join_list that were not (or contain table(s) that 
+    weren't) removed by table elimination.
+  */
+  uint              n_tables;
   nested_join_map   nj_map;          /* Bit used to identify this nested join*/
+  /*
+    (Valid only for semi-join nests) Bitmap of tables outside the semi-join
+    that are used within the semi-join's ON condition.
+  */
+  table_map         sj_depends_on;
+  /* Outer non-trivially correlated tables */
+  table_map         sj_corr_tables;
+  List<Item>        sj_outer_expr_list;
   /**
      True if this join nest node is completely covered by the query execution
      plan. This means two things.
@@ -1955,7 +2189,7 @@ typedef struct st_nested_join
 
      2. All child join nest nodes are fully covered.
    */
-  bool is_fully_covered() const { return join_list.elements == counter; }
+  bool is_fully_covered() const { return n_tables == counter; }
 } NESTED_JOIN;
 
 
diff --git a/sql/thr_malloc.cc b/sql/thr_malloc.cc
index 546a47c0dfd..ba534e52b99 100644
--- a/sql/thr_malloc.cc
+++ b/sql/thr_malloc.cc
@@ -67,11 +67,13 @@ void init_sql_alloc(MEM_ROOT *mem_root, uint block_size, uint pre_alloc)
 }
 
 
+#ifndef MYSQL_CLIENT
 void *sql_alloc(size_t Size)
 {
   MEM_ROOT *root= *my_pthread_getspecific_ptr(MEM_ROOT**,THR_MALLOC);
   return alloc_root(root,Size);
 }
+#endif
 
 
 void *sql_calloc(size_t size)
diff --git a/sql/thr_malloc.h b/sql/thr_malloc.h
index b8c86c27e19..81b7d3cc238 100644
--- a/sql/thr_malloc.h
+++ b/sql/thr_malloc.h
@@ -18,7 +18,6 @@
 
 #include "my_global.h"                          // uint, size_t
 
-typedef struct charset_info_st CHARSET_INFO;
 typedef struct st_mem_root MEM_ROOT;
 
 void init_sql_alloc(MEM_ROOT *root, uint block_size, uint pre_alloc_size);
diff --git a/sql/tztime.cc b/sql/tztime.cc
index f245b736f52..aa945687675 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -222,7 +222,7 @@ tz_load(const char *name, TIME_ZONE_INFO *sp, MEM_ROOT *storage)
                                          ALIGN_SIZE(sp->typecnt *
                                                     sizeof(TRAN_TYPE_INFO)) +
 #ifdef ABBR_ARE_USED
-                                         ALIGN_SIZE(sp->charcnt) +
+                                         ALIGN_SIZE(sp->charcnt+1) +
 #endif
                                          sp->leapcnt * sizeof(LS_INFO))))
       return 1;
@@ -235,7 +235,7 @@ tz_load(const char *name, TIME_ZONE_INFO *sp, MEM_ROOT *storage)
     tzinfo_buf+= ALIGN_SIZE(sp->typecnt * sizeof(TRAN_TYPE_INFO));
 #ifdef ABBR_ARE_USED
     sp->chars= tzinfo_buf;
-    tzinfo_buf+= ALIGN_SIZE(sp->charcnt);
+    tzinfo_buf+= ALIGN_SIZE(sp->charcnt+1);
 #endif
     sp->lsis= (LS_INFO *)tzinfo_buf;
 
@@ -456,7 +456,7 @@ prepare_tz_info(TIME_ZONE_INFO *sp, MEM_ROOT *storage)
 
     if (end_t == MY_TIME_T_MAX ||
         ((cur_off_and_corr > 0) &&
-        (end_t >= MY_TIME_T_MAX - cur_off_and_corr)))
+         (end_t >= MY_TIME_T_MAX - cur_off_and_corr)))
       /* end of t space */
       break;
 
@@ -823,9 +823,11 @@ sec_since_epoch(int year, int mon, int mday, int hour, int min ,int sec)
     TIME_to_gmt_sec()
       t               - pointer to structure for broken down represenatation
       sp              - pointer to struct with time zone description
-      in_dst_time_gap - pointer to bool which is set to true if datetime
-                        value passed doesn't really exist (i.e. falls into
-                        spring time-gap) and is not touched otherwise.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
 
   DESCRIPTION
     This is mktime analog for MySQL. It is essentially different
@@ -887,20 +889,23 @@ sec_since_epoch(int year, int mon, int mday, int hour, int min ,int sec)
     Seconds in UTC since Epoch.
     0 in case of error.
 */
+
 static my_time_t
-TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
-                my_bool *in_dst_time_gap)
+TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp, uint *error_code)
 {
   my_time_t local_t;
   uint saved_seconds;
   uint i;
   int shift= 0;
-
   DBUG_ENTER("TIME_to_gmt_sec");
 
   if (!validate_timestamp_range(t))
+  {
+    *error_code= ER_WARN_DATA_OUT_OF_RANGE;
     DBUG_RETURN(0);
+  }
 
+  *error_code= 0;
 
   /* We need this for correct leap seconds handling */
   if (t->second < SECS_PER_MIN)
@@ -944,6 +949,7 @@ TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
       This means that source time can't be represented as my_time_t due to
       limited my_time_t range.
     */
+    *error_code= ER_WARN_DATA_OUT_OF_RANGE;
     DBUG_RETURN(0);
   }
 
@@ -960,6 +966,7 @@ TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
     if (local_t > (my_time_t) (TIMESTAMP_MAX_VALUE - shift * SECS_PER_DAY +
                                sp->revtis[i].rt_offset - saved_seconds))
     {
+      *error_code= ER_WARN_DATA_OUT_OF_RANGE;
       DBUG_RETURN(0);                           /* my_time_t overflow */
     }
     local_t+= shift * SECS_PER_DAY;
@@ -973,7 +980,7 @@ TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
       Now we are returning my_time_t value corresponding to the
       beginning of the gap.
     */
-    *in_dst_time_gap= 1;
+    *error_code= ER_WARN_INVALID_TIMESTAMP;
     local_t= sp->revts[i] - sp->revtis[i].rt_offset + saved_seconds;
   }
   else
@@ -981,7 +988,10 @@ TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
 
   /* check for TIMESTAMP_MAX_VALUE was already done above */
   if (local_t < TIMESTAMP_MIN_VALUE)
+  {
     local_t= 0;
+    *error_code= ER_WARN_DATA_OUT_OF_RANGE;
+  }
 
   DBUG_RETURN(local_t);
 }
@@ -1015,8 +1025,7 @@ class Time_zone_system : public Time_zone
 {
 public:
   Time_zone_system() {}                       /* Remove gcc warning */
-  virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t,
-                                    my_bool *in_dst_time_gap) const;
+  virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const;
   virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const;
   virtual const String * get_name() const;
 };
@@ -1030,9 +1039,11 @@ public:
     TIME_to_gmt_sec()
       t               - pointer to MYSQL_TIME structure with local time in
                         broken-down representation.
-      in_dst_time_gap - pointer to bool which is set to true if datetime
-                        value passed doesn't really exist (i.e. falls into
-                        spring time-gap) and is not touched otherwise.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
 
   DESCRIPTION
     This method uses system function (localtime_r()) for conversion
@@ -1048,10 +1059,10 @@ public:
     Corresponding my_time_t value or 0 in case of error
 */
 my_time_t
-Time_zone_system::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap) const
+Time_zone_system::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const
 {
   long not_used;
-  return my_system_gmt_sec(t, &not_used, in_dst_time_gap);
+  return my_system_gmt_sec(t, &not_used, error_code);
 }
 
 
@@ -1111,7 +1122,7 @@ class Time_zone_utc : public Time_zone
 public:
   Time_zone_utc() {}                          /* Remove gcc warning */
   virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t,
-                                    my_bool *in_dst_time_gap) const;
+                                    uint *error_code) const;
   virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const;
   virtual const String * get_name() const;
 };
@@ -1120,14 +1131,6 @@ public:
 /*
   Convert UTC time from MYSQL_TIME representation to its my_time_t representation.
 
-  SYNOPSIS
-    TIME_to_gmt_sec()
-      t               - pointer to MYSQL_TIME structure with local time
-                        in broken-down representation.
-      in_dst_time_gap - pointer to bool which is set to true if datetime
-                        value passed doesn't really exist (i.e. falls into
-                        spring time-gap) and is not touched otherwise.
-
   DESCRIPTION
     Since Time_zone_utc is used only internally for my_time_t -> TIME
     conversions, this function of Time_zone interface is not implemented for
@@ -1137,10 +1140,11 @@ public:
     0
 */
 my_time_t
-Time_zone_utc::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap) const
+Time_zone_utc::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const
 {
   /* Should be never called */
   DBUG_ASSERT(0);
+  *error_code= ER_WARN_DATA_OUT_OF_RANGE;
   return 0;
 }
 
@@ -1200,8 +1204,7 @@ class Time_zone_db : public Time_zone
 {
 public:
   Time_zone_db(TIME_ZONE_INFO *tz_info_arg, const String * tz_name_arg);
-  virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t,
-                                    my_bool *in_dst_time_gap) const;
+  virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const;
   virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const;
   virtual const String * get_name() const;
 private:
@@ -1238,9 +1241,11 @@ Time_zone_db::Time_zone_db(TIME_ZONE_INFO *tz_info_arg,
     TIME_to_gmt_sec()
       t               - pointer to MYSQL_TIME structure with local time
                         in broken-down representation.
-      in_dst_time_gap - pointer to bool which is set to true if datetime
-                        value passed doesn't really exist (i.e. falls into
-                        spring time-gap) and is not touched otherwise.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
 
   DESCRIPTION
     Please see ::TIME_to_gmt_sec for function description and
@@ -1250,9 +1255,9 @@ Time_zone_db::Time_zone_db(TIME_ZONE_INFO *tz_info_arg,
     Corresponding my_time_t value or 0 in case of error
 */
 my_time_t
-Time_zone_db::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap) const
+Time_zone_db::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const
 {
-  return ::TIME_to_gmt_sec(t, tz_info, in_dst_time_gap);
+  return ::TIME_to_gmt_sec(t, tz_info, error_code);
 }
 
 
@@ -1298,7 +1303,7 @@ class Time_zone_offset : public Time_zone
 public:
   Time_zone_offset(long tz_offset_arg);
   virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t,
-                                    my_bool *in_dst_time_gap) const;
+                                    uint *error_code) const;
   virtual void   gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const;
   virtual const String * get_name() const;
   /*
@@ -1340,17 +1345,18 @@ Time_zone_offset::Time_zone_offset(long tz_offset_arg):
     TIME_to_gmt_sec()
       t               - pointer to MYSQL_TIME structure with local time
                         in broken-down representation.
-      in_dst_time_gap - pointer to bool which should be set to true if
-                        datetime  value passed doesn't really exist
-                        (i.e. falls into spring time-gap) and is not
-                        touched otherwise.
-                        It is not really used in this class.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
 
   RETURN VALUE
-    Corresponding my_time_t value or 0 in case of error
+    Corresponding my_time_t value or 0 in case of error.
 */
+
 my_time_t
-Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap) const
+Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const
 {
   my_time_t local_t;
   int shift= 0;
@@ -1360,7 +1366,11 @@ Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap)
     us to make all validation checks here.
   */
   if (!validate_timestamp_range(t))
+  {
+    *error_code= ER_WARN_DATA_OUT_OF_RANGE;
     return 0;
+  }
+  *error_code= 0;
 
   /*
     Do a temporary shift of the boundary dates to avoid
@@ -1384,6 +1394,7 @@ Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap)
     return local_t;
 
   /* range error*/
+  *error_code= ER_WARN_DATA_OUT_OF_RANGE;
   return 0;
 }
 
@@ -1716,7 +1727,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap)
 
   tz_leapcnt= 0;
 
-  res= table->file->index_first(table->record[0]);
+  res= table->file->ha_index_first(table->record[0]);
 
   while (!res)
   {
@@ -1738,7 +1749,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap)
                 tz_leapcnt, (ulong) tz_lsis[tz_leapcnt-1].ls_trans,
                 tz_lsis[tz_leapcnt-1].ls_corr));
 
-    res= table->file->index_next(table->record[0]);
+    res= table->file->ha_index_next(table->record[0]);
   }
 
   (void)table->file->ha_index_end();
@@ -1854,6 +1865,8 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   uint tzid, ttid;
   my_time_t ttime;
   char buff[MAX_FIELD_WIDTH];
+  uchar keybuff[32];
+  Field *field;
   String abbr(buff, sizeof(buff), &my_charset_latin1);
   char *alloc_buff, *tz_name_buff;
   /*
@@ -1906,8 +1919,8 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   */
   (void)table->file->ha_index_init(0, 1);
 
-  if (table->file->index_read_map(table->record[0], table->field[0]->ptr,
-                                  HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_map(table->record[0], table->field[0]->ptr,
+                                     HA_WHOLE_KEY, HA_READ_KEY_EXACT))
   {
 #ifdef EXTRA_DEBUG
     /*
@@ -1931,11 +1944,16 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   */
   table= tz_tables->table;
   tz_tables= tz_tables->next_local;
-  table->field[0]->store((longlong) tzid, TRUE);
+  field= table->field[0];
+  field->store((longlong) tzid, TRUE);
+  DBUG_ASSERT(field->key_length() <= sizeof(keybuff));
+  field->get_key_image(keybuff,
+                       min(field->key_length(), sizeof(keybuff)),
+                       Field::itRAW);
   (void)table->file->ha_index_init(0, 1);
 
-  if (table->file->index_read_map(table->record[0], table->field[0]->ptr,
-                                  HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_map(table->record[0], keybuff,
+                                     HA_WHOLE_KEY, HA_READ_KEY_EXACT))
   {
     sql_print_error("Can't find description of time zone '%u'", tzid);
     goto end;
@@ -1958,11 +1976,16 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   */
   table= tz_tables->table;
   tz_tables= tz_tables->next_local;
-  table->field[0]->store((longlong) tzid, TRUE);
+  field= table->field[0];
+  field->store((longlong) tzid, TRUE);
+  DBUG_ASSERT(field->key_length() <= sizeof(keybuff));
+  field->get_key_image(keybuff,
+                       min(field->key_length(), sizeof(keybuff)),
+                       Field::itRAW);
   (void)table->file->ha_index_init(0, 1);
 
-  res= table->file->index_read_map(table->record[0], table->field[0]->ptr,
-                                   (key_part_map)1, HA_READ_KEY_EXACT);
+  res= table->file->ha_index_read_map(table->record[0], keybuff,
+                                      (key_part_map)1, HA_READ_KEY_EXACT);
   while (!res)
   {
     ttid= (uint)table->field[1]->val_int();
@@ -2009,8 +2032,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
 
     tmp_tz_info.typecnt= ttid + 1;
 
-    res= table->file->index_next_same(table->record[0],
-                                      table->field[0]->ptr, 4);
+    res= table->file->ha_index_next_same(table->record[0], keybuff, 4);
   }
 
   if (res != HA_ERR_END_OF_FILE)
@@ -2032,8 +2054,8 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   table->field[0]->store((longlong) tzid, TRUE);
   (void)table->file->ha_index_init(0, 1);
 
-  res= table->file->index_read_map(table->record[0], table->field[0]->ptr,
-                                   (key_part_map)1, HA_READ_KEY_EXACT);
+  res= table->file->ha_index_read_map(table->record[0], keybuff,
+                                      (key_part_map)1, HA_READ_KEY_EXACT);
   while (!res)
   {
     ttime= (my_time_t)table->field[1]->val_int();
@@ -2062,8 +2084,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
       ("time_zone_transition table: tz_id: %u  tt_time: %lu  tt_id: %u",
        tzid, (ulong) ttime, ttid));
 
-    res= table->file->index_next_same(table->record[0],
-                                      table->field[0]->ptr, 4);
+    res= table->file->ha_index_next_same(table->record[0], keybuff, 4);
   }
 
   /*
@@ -2771,7 +2792,7 @@ main(int argc, char **argv)
             for (time_tmp.second=0; time_tmp.second<60; time_tmp.second+=25)
             {
               long not_used;
-              my_bool not_used_2;
+              uint not_used_2;
               t= (time_t)my_system_gmt_sec(&time_tmp, &not_used, &not_used_2);
               t1= (time_t)TIME_to_gmt_sec(&time_tmp, &tz_info, &not_used_2);
               if (t != t1)
diff --git a/sql/tztime.h b/sql/tztime.h
index e673ca901d6..eb7d85c48b2 100644
--- a/sql/tztime.h
+++ b/sql/tztime.h
@@ -45,11 +45,11 @@ public:
   /**
     Converts local time in broken down MYSQL_TIME representation to 
     my_time_t (UTC seconds since Epoch) represenation.
-    Returns 0 in case of error. Sets in_dst_time_gap to true if date provided
-    falls into spring time-gap (or lefts it untouched otherwise).
+    Returns 0 in case of error. May set error_code to ER_WARN_DATA_OUT_OF_RANGE
+    or ER_WARN_INVALID_TIMESTAMP, see TIME_to_timestamp())
   */
   virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, 
-                                    my_bool *in_dst_time_gap) const = 0;
+                                    uint *error_code) const = 0;
   /**
     Converts time in my_time_t representation to local time in
     broken down MYSQL_TIME representation.
diff --git a/sql/udf_example.c b/sql/udf_example.c
index bfd153b7d28..d68f49e1729 100644
--- a/sql/udf_example.c
+++ b/sql/udf_example.c
@@ -211,7 +211,7 @@ char *is_const(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long
 **		try to keep the error message less than 80 bytes long!
 **
 ** This function should return 1 if something goes wrong. In this case
-** message should contain something usefull!
+** message should contain something useful!
 **************************************************************************/
 
 #define MAXMETAPH 8
diff --git a/sql/udf_example.def b/sql/udf_example.def
index 3d569941cc8..41150b24e8f 100644
--- a/sql/udf_example.def
+++ b/sql/udf_example.def
@@ -3,8 +3,10 @@ VERSION		1.0
 EXPORTS
   lookup
   lookup_init
+  lookup_deinit
   reverse_lookup
   reverse_lookup_init
+  reverse_lookup_deinit
   metaphon_init
   metaphon_deinit
   metaphon
diff --git a/sql/uniques.cc b/sql/uniques.cc
index e7ce2197147..ae50a1d3970 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -48,6 +48,12 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique)
   return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
 }
 
+int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique)
+{
+  return my_b_write(&unique->file, key, unique->size) ||
+         my_b_write(&unique->file, &count, sizeof(element_count)) ? 1 : 0;
+}
+
 int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
 {
   memcpy(unique->record_pointers, key, unique->size);
@@ -55,10 +61,28 @@ int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
   return 0;
 }
 
+int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique)
+{
+  if (count >= unique->min_dupl_count)
+  {
+    memcpy(unique->record_pointers, key, unique->size);
+    unique->record_pointers+=unique->size;
+  }
+  else
+    unique->filtered_out_elems++;
+  return 0;
+}
+
+
 Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
-	       uint size_arg, ulonglong max_in_memory_size_arg)
+	       uint size_arg, ulonglong max_in_memory_size_arg,
+               uint min_dupl_count_arg)
   :max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0)
 {
+  min_dupl_count= min_dupl_count_arg;
+  full_size= size;
+  if (min_dupl_count_arg)
+    full_size+= sizeof(element_count);
   my_b_clear(&file);
   init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, 0,
             NULL, comp_func_fixed_arg);
@@ -126,7 +150,8 @@ inline double log2_n_fact(double x)
 */
 
 static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
-                                     uint *first, uint *last)
+                                     uint *first, uint *last,
+                                     uint compare_factor)
 {
   uint total_buf_elems= 0;
   for (uint *pbuf= first; pbuf <= last; pbuf++)
@@ -137,7 +162,7 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
 
   /* Using log2(n)=log(n)/log(2) formula */
   return 2*((double)total_buf_elems*elem_size) / IO_SIZE +
-     total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
+     total_buf_elems*log((double) n_buffers) / (compare_factor * M_LN2);
 }
 
 
@@ -170,7 +195,8 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
 
 static double get_merge_many_buffs_cost(uint *buffer,
                                         uint maxbuffer, uint max_n_elems,
-                                        uint last_n_elems, int elem_size)
+                                        uint last_n_elems, int elem_size,
+                                        uint compare_factor)
 {
   register int i;
   double total_cost= 0.0;
@@ -197,19 +223,22 @@ static double get_merge_many_buffs_cost(uint *buffer,
       {
         total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
                                            buff_elems + i,
-                                           buff_elems + i + MERGEBUFF-1);
+                                           buff_elems + i + MERGEBUFF-1,
+                                           compare_factor);
 	lastbuff++;
       }
       total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
                                          buff_elems + i,
-                                         buff_elems + maxbuffer);
+                                         buff_elems + maxbuffer,
+                                         compare_factor);
       maxbuffer= lastbuff;
     }
   }
 
   /* Simulate final merge_buff call. */
   total_cost += get_merge_buffers_cost(buff_elems, elem_size,
-                                       buff_elems, buff_elems + maxbuffer);
+                                       buff_elems, buff_elems + maxbuffer,
+                                       compare_factor);
   return total_cost;
 }
 
@@ -224,7 +253,11 @@ static double get_merge_many_buffs_cost(uint *buffer,
                 to get # bytes needed.
       nkeys     #of elements in Unique
       key_size  size of each elements in bytes
-      max_in_memory_size amount of memory Unique will be allowed to use
+      max_in_memory_size   amount of memory Unique will be allowed to use
+      compare_factor   used to calculate cost of one comparison
+      write_fl  if the result must be saved written to disk
+      in_memory_elems  OUT estimate of the number of elements in memory
+                           if disk is not used  
 
   RETURN
     Cost in disk seeks.
@@ -261,15 +294,17 @@ static double get_merge_many_buffs_cost(uint *buffer,
       these will be random seeks.
 */
 
-double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size,
-                            ulonglong max_in_memory_size)
+double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size,
+                            ulonglong max_in_memory_size,
+                            uint compare_factor,
+                            bool intersect_fl, bool *in_memory)
 {
-  ulong max_elements_in_tree;
-  ulong last_tree_elems;
+  size_t max_elements_in_tree;
+  size_t last_tree_elems;
   int   n_full_trees; /* number of trees in unique - 1 */
   double result;
 
-  max_elements_in_tree= ((ulong) max_in_memory_size /
+  max_elements_in_tree= ((size_t) max_in_memory_size /
                          ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
 
   n_full_trees=    nkeys / max_elements_in_tree;
@@ -279,11 +314,15 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size,
   result= 2*log2_n_fact(last_tree_elems + 1.0);
   if (n_full_trees)
     result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
-  result /= TIME_FOR_COMPARE_ROWID;
+  result /= compare_factor;
 
-  DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
-                     n_full_trees, n_full_trees?max_elements_in_tree:0,
-                     last_tree_elems));
+  DBUG_PRINT("info",("unique trees sizes: %u=%u*%u + %u", (uint)nkeys,
+                     (uint)n_full_trees, 
+                     (uint)(n_full_trees?max_elements_in_tree:0),
+                     (uint)last_tree_elems));
+
+  if (in_memory)
+    *in_memory= !n_full_trees;
 
   if (!n_full_trees)
     return result;
@@ -298,12 +337,12 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size,
   result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE);
 
   /* Cost of merge */
+  if (intersect_fl)
+    key_size+= sizeof(element_count);
   double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees,
                                                max_elements_in_tree,
-                                               last_tree_elems, key_size);
-  if (merge_cost < 0.0)
-    return merge_cost;
-
+                                               last_tree_elems, key_size,
+                                               compare_factor);
   result += merge_cost;
   /*
     Add cost of reading the resulting sequence, assuming there were no
@@ -330,7 +369,10 @@ bool Unique::flush()
   file_ptr.count=tree.elements_in_tree;
   file_ptr.file_pos=my_b_tell(&file);
 
-  if (tree_walk(&tree, (tree_walk_action) unique_write_to_file,
+  tree_walk_action action= min_dupl_count ?
+		           (tree_walk_action) unique_write_to_file_with_count :
+		           (tree_walk_action) unique_write_to_file;
+  if (tree_walk(&tree, action,
 		(void*) this, left_root_right) ||
       insert_dynamic(&file_ptrs, (uchar*) &file_ptr))
     return 1;
@@ -360,6 +402,7 @@ Unique::reset()
     reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
   }
   elements= 0;
+  tree.flag= 0;
 }
 
 /*
@@ -426,7 +469,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
   if (end <= begin ||
       merge_buffer_size < (ulong) (key_length * (end - begin + 1)) ||
       init_queue(&queue, (uint) (end - begin), offsetof(BUFFPEK, key), 0,
-                 buffpek_compare, &compare_context))
+                 buffpek_compare, &compare_context, 0, 0))
     return 1;
   /* we need space for one key when a piece of merge buffer is re-read */
   merge_buffer_size-= key_length;
@@ -471,7 +514,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
     */
     top->key+= key_length;
     if (--top->mem_count)
-      queue_replaced(&queue);
+      queue_replace_top(&queue);
     else /* next piece should be read */
     {
       /* save old_key not to overwrite it in read_to_buffer */
@@ -481,14 +524,14 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
       if (bytes_read == (uint) (-1))
         goto end;
       else if (bytes_read > 0)      /* top->key, top->mem_count are reset */
-        queue_replaced(&queue);     /* in read_to_buffer */
+        queue_replace_top(&queue);             /* in read_to_buffer */
       else
       {
         /*
           Tree for old 'top' element is empty: remove it from the queue and
           give all its memory to the nearest tree.
         */
-        queue_remove(&queue, 0);
+        queue_remove_top(&queue);
         reuse_freed_buff(&queue, top, key_length);
       }
     }
@@ -579,15 +622,19 @@ bool Unique::get(TABLE *table)
 {
   SORTPARAM sort_param;
   table->sort.found_records=elements+tree.elements_in_tree;
-
   if (my_b_tell(&file) == 0)
   {
     /* Whole tree is in memory;  Don't use disk if you don't need to */
     if ((record_pointers=table->sort.record_pointers= (uchar*)
 	 my_malloc(size * tree.elements_in_tree, MYF(0))))
     {
-      (void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs,
+      tree_walk_action action= min_dupl_count ?
+		         (tree_walk_action) unique_intersect_write_to_ptrs :
+		         (tree_walk_action) unique_write_to_ptrs;
+      filtered_out_elems= 0;
+      (void) tree_walk(&tree, action,
 		       this, left_root_right);
+      table->sort.found_records-= filtered_out_elems;
       return 0;
     }
   }
@@ -606,9 +653,10 @@ bool Unique::get(TABLE *table)
   outfile=table->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE),
                                 MYF(MY_ZEROFILL));
 
-  if (!outfile || (! my_b_inited(outfile) &&
-      open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
-		       MYF(MY_WME))))
+  if (!outfile ||
+      (! my_b_inited(outfile) &&
+       open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
+                        MYF(MY_WME))))
     return 1;
   reinit_io_cache(outfile,WRITE_CACHE,0L,0,0);
 
@@ -616,7 +664,9 @@ bool Unique::get(TABLE *table)
   sort_param.max_rows= elements;
   sort_param.sort_form=table;
   sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
-    size;
+   full_size;
+  sort_param.min_dupl_count= min_dupl_count;
+  sort_param.res_length= 0;
   sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length);
   sort_param.not_killable=1;
 
@@ -637,8 +687,9 @@ bool Unique::get(TABLE *table)
   if (flush_io_cache(&file) ||
       reinit_io_cache(&file,READ_CACHE,0L,0,0))
     goto err;
-  if (merge_buffers(&sort_param, &file, outfile, sort_buffer, file_ptr,
-		    file_ptr, file_ptr+maxbuffer,0))
+  sort_param.res_length= sort_param.rec_length-
+                         (min_dupl_count ? sizeof(min_dupl_count) : 0);
+  if (merge_index(&sort_param, sort_buffer, file_ptr, maxbuffer, &file, outfile))
     goto err;
   error=0;
 err:
@@ -653,3 +704,5 @@ err:
   outfile->end_of_file=save_pos;
   return error;
 }
+
+
diff --git a/sql/unireg.cc b/sql/unireg.cc
index d77a6b06275..b5faac3b797 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -29,11 +29,15 @@
 #include "sql_partition.h"                      // struct partition_info
 #include "sql_table.h"                          // check_duplicate_warning
 #include "sql_class.h"                  // THD, Internal_error_handler
+#include "create_options.h"
 #include <m_ctype.h>
 #include <assert.h>
 
 #define FCOMP			17		/* Bytes for a packed field */
 
+/* threshold for safe_alloca */
+#define ALLOCA_THRESHOLD       2048
+
 static uchar * pack_screens(List<Create_field> &create_fields,
 			    uint *info_length, uint *screens, bool small_file);
 static uint pack_keys(uchar *keybuff,uint key_count, KEY *key_info,
@@ -117,6 +121,7 @@ bool mysql_create_frm(THD *thd, const char *file_name,
   ulong key_buff_length;
   File file;
   ulong filepos, data_offset;
+  uint options_len;
   uchar fileinfo[64],forminfo[288],*keybuff;
   uchar *screen_buff;
   char buff[128];
@@ -125,9 +130,6 @@ bool mysql_create_frm(THD *thd, const char *file_name,
 #endif
   Pack_header_error_handler pack_header_error_handler;
   int error;
-  const uint format_section_header_size= 8;
-  uint format_section_length;
-  uint tablespace_length= 0;
   DBUG_ENTER("mysql_create_frm");
 
   DBUG_ASSERT(*fn_rext((char*)file_name)); // Check .frm extension
@@ -170,8 +172,7 @@ bool mysql_create_frm(THD *thd, const char *file_name,
   reclength=uint2korr(forminfo+266);
 
   /* Calculate extra data segment length */
-  str_db_type.str= (char *) ha_resolve_storage_engine_name(create_info->db_type);
-  str_db_type.length= strlen(str_db_type.str);
+  str_db_type= *hton_name(create_info->db_type);
   /* str_db_type */
   create_info->extra_size= (2 + str_db_type.length +
                             2 + create_info->connect_string.length);
@@ -195,6 +196,19 @@ bool mysql_create_frm(THD *thd, const char *file_name,
     if (key_info[i].parser_name)
       create_info->extra_size+= key_info[i].parser_name->length + 1;
   }
+
+  options_len= engine_table_options_frm_length(create_info->option_list,
+                                               create_fields,
+                                               keys, key_info);
+  DBUG_PRINT("info", ("Options length: %u", options_len));
+  if (options_len)
+  {
+    create_info->table_options|= HA_OPTION_TEXT_CREATE_OPTIONS;
+    create_info->extra_size+= (options_len + 4);
+  }
+  else
+    create_info->table_options&= ~HA_OPTION_TEXT_CREATE_OPTIONS;
+
   /*
     This gives us the byte-position of the character at
     (character-position, not byte-position) TABLE_COMMENT_MAXLEN.
@@ -260,18 +274,6 @@ bool mysql_create_frm(THD *thd, const char *file_name,
     forminfo[46]=(uchar) create_info->comment.length;
   }
 
-  /*
-    Add room in extra segment for "format section" with additional
-    table and column properties
-  */
-  if (create_info->tablespace)
-    tablespace_length= strlen(create_info->tablespace);
-  format_section_length=
-    format_section_header_size +
-    tablespace_length + 1 +
-    create_fields.elements;
-  create_info->extra_size+= format_section_length;
-
   if ((file=create_frm(thd, file_name, db, table, reclength, fileinfo,
 		       create_info, keys, key_info)) < 0)
   {
@@ -350,6 +352,7 @@ bool mysql_create_frm(THD *thd, const char *file_name,
     if (mysql_file_write(file, (uchar*) buff, 6, MYF_RW))
       goto err;
   }
+
   for (i= 0; i < keys; i++)
   {
     if (key_info[i].parser_name)
@@ -369,53 +372,22 @@ bool mysql_create_frm(THD *thd, const char *file_name,
       goto err;
   }
 
-  /* "Format section" with additional table and column properties */
+  if (options_len)
   {
-    uchar *ptr, *format_section_buff;
-    if (!(format_section_buff=(uchar*) my_malloc(format_section_length,
-                                                 MYF(MY_WME))))
+    uchar *optbuff= (uchar *)my_safe_alloca(options_len + 4, ALLOCA_THRESHOLD);
+    my_bool error;
+    DBUG_PRINT("info", ("Create options length: %u", options_len));
+    if (!optbuff)
       goto err;
-    ptr= format_section_buff;
-
-    /* header */
-    const uint format_section_flags=
-      create_info->storage_media; // 3 bits
-    const uint format_section_unused= 0;
-    int2store(ptr+0, format_section_length);
-    int4store(ptr+2, format_section_flags);
-    int2store(ptr+6, format_section_unused);
-    ptr+= format_section_header_size;
-
-    /* tablespace name */
-    if (tablespace_length > 0)
-      memcpy(ptr, create_info->tablespace, tablespace_length);
-    ptr+= tablespace_length;
-    *ptr= 0; /* tablespace string terminating zero */
-    ptr++;
-
-    /* column properties  */
-    Create_field *field;
-    List_iterator<Create_field> it(create_fields);
-    while ((field=it++))
-    {
-      const uchar field_storage= 0; /* Used in MySQL Cluster */
-      const uchar field_column_format= 0; /* Used in MySQL Cluster */
-      const uchar field_flags=
-        field_storage + (field_column_format << COLUMN_FORMAT_SHIFT);
-      *ptr= field_flags;
-      ptr++;
-    }
-    DBUG_ASSERT(format_section_buff + format_section_length == ptr);
-
-    if (mysql_file_write(file, format_section_buff,
-                         format_section_length, MYF_RW))
-    {
-      my_free(format_section_buff);
+    int4store(optbuff, options_len);
+    engine_table_options_frm_image(optbuff + 4,
+                                   create_info->option_list,
+                                   create_fields,
+                                   keys, key_info);
+    error= my_write(file, optbuff, options_len + 4, MYF_RW);
+    my_safe_afree(optbuff, options_len + 4, ALLOCA_THRESHOLD);
+    if (error)
       goto err;
-    }
-    DBUG_PRINT("info", ("wrote format section, length: %u",
-                        format_section_length));
-    my_free(format_section_buff);
   }
 
   mysql_file_seek(file, filepos, MY_SEEK_SET, MYF(0));
@@ -637,7 +609,7 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo,
     int2store(pos+6, key->block_size);
     pos+=8;
     key_parts+=key->key_parts;
-    DBUG_PRINT("loop", ("flags: %lu  key_parts: %d at 0x%lx",
+    DBUG_PRINT("loop", ("flags: %lu  key_parts: %d  key_part: 0x%lx",
                         key->flags, key->key_parts,
                         (long) key->key_part));
     for (key_part=key->key_part,key_part_end=key_part+key->key_parts ;
@@ -707,7 +679,7 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
 {
   uint length,int_count,int_length,no_empty, int_parts;
   uint time_stamp_pos,null_fields;
-  ulong reclength, totlength, n_length, com_length;
+  ulong reclength, totlength, n_length, com_length, vcol_info_length;
   DBUG_ENTER("pack_header");
 
   if (create_fields.elements > MAX_FIELDS)
@@ -718,8 +690,8 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
 
   totlength= 0L;
   reclength= data_offset;
-  no_empty=int_count=int_parts=int_length=time_stamp_pos=null_fields=
-    com_length=0;
+  no_empty=int_count=int_parts=int_length=time_stamp_pos=null_fields=0;
+  com_length=vcol_info_length=0;
   n_length=2L;
 
 	/* Check fields */
@@ -752,6 +724,30 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
                      ER_TOO_LONG_FIELD_COMMENT, warn_buff);
       field->comment.length= tmp_len;
     }
+    if (field->vcol_info)
+    {
+      tmp_len=
+        system_charset_info->cset->charpos(system_charset_info,
+                                           field->vcol_info->expr_str.str,
+                                           field->vcol_info->expr_str.str +
+                                           field->vcol_info->expr_str.length,
+                                           VIRTUAL_COLUMN_EXPRESSION_MAXLEN);
+
+      if (tmp_len < field->vcol_info->expr_str.length)
+      {
+        my_error(ER_WRONG_STRING_LENGTH, MYF(0),
+                 field->vcol_info->expr_str.str,"VIRTUAL COLUMN EXPRESSION",
+                 (uint) VIRTUAL_COLUMN_EXPRESSION_MAXLEN);
+        DBUG_RETURN(1);
+      }
+      /*
+        Sum up the length of the expression string and the length of the
+        mandatory header to the total length of info on the defining 
+        expressions saved in the frm file for virtual columns.
+      */
+      vcol_info_length+= field->vcol_info->expr_str.length+
+                         (uint)FRM_VCOL_HEADER_SIZE;
+    }
 
     totlength+= field->length;
     com_length+= field->comment.length;
@@ -771,8 +767,6 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
 	!time_stamp_pos)
       time_stamp_pos= (uint) field->offset+ (uint) data_offset + 1;
     length=field->pack_length;
-    /* Ensure we don't have any bugs when generating offsets */
-    DBUG_ASSERT(reclength == field->offset + data_offset);
     if ((uint) field->offset+ (uint) data_offset+ length > reclength)
       reclength=(uint) (field->offset+ data_offset + length);
     n_length+= (ulong) strlen(field->field_name)+1;
@@ -839,7 +833,8 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
   /* Hack to avoid bugs with small static rows in MySQL */
   reclength=max(file->min_record_length(table_options),reclength);
   if (info_length+(ulong) create_fields.elements*FCOMP+288+
-      n_length+int_length+com_length > 65535L || int_count > 255)
+      n_length+int_length+com_length+vcol_info_length > 65535L || 
+      int_count > 255)
   {
     my_message(ER_TOO_MANY_FIELDS, ER(ER_TOO_MANY_FIELDS), MYF(0));
     DBUG_RETURN(1);
@@ -847,7 +842,7 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
 
   bzero((char*)forminfo,288);
   length=(info_length+create_fields.elements*FCOMP+288+n_length+int_length+
-	  com_length);
+	  com_length+vcol_info_length);
   int2store(forminfo,length);
   forminfo[256] = (uint8) screens;
   int2store(forminfo+258,create_fields.elements);
@@ -864,7 +859,8 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
   int2store(forminfo+280,22);			/* Rows needed */
   int2store(forminfo+282,null_fields);
   int2store(forminfo+284,com_length);
-  /* Up to forminfo+288 is free to use for additional information */
+  int2store(forminfo+286,vcol_info_length);
+  /* forminfo+288 is free to use for additional information */
   DBUG_RETURN(0);
 } /* pack_header */
 
@@ -903,7 +899,7 @@ static bool pack_fields(File file, List<Create_field> &create_fields,
                         ulong data_offset)
 {
   reg2 uint i;
-  uint int_count, comment_length=0;
+  uint int_count, comment_length= 0, vcol_info_length=0;
   uchar buff[MAX_FIELD_WIDTH];
   Create_field *field;
   DBUG_ENTER("pack_fields");
@@ -916,6 +912,7 @@ static bool pack_fields(File file, List<Create_field> &create_fields,
   while ((field=it++))
   {
     uint recpos;
+    uint cur_vcol_expr_len= 0;
     buff[0]= (uchar) field->row;
     buff[1]= (uchar) field->col;
     buff[2]= (uchar) field->sc_length;
@@ -945,6 +942,17 @@ static bool pack_fields(File file, List<Create_field> &create_fields,
     {
       buff[11]= buff[14]= 0;			// Numerical
     }
+    if (field->vcol_info)
+    {
+      /* 
+        Use the interval_id place in the .frm file to store the length of
+        the additional data saved for the virtual field
+      */
+      buff[12]= cur_vcol_expr_len= field->vcol_info->expr_str.length +
+                (uint)FRM_VCOL_HEADER_SIZE;
+      vcol_info_length+= cur_vcol_expr_len+(uint)FRM_VCOL_HEADER_SIZE;
+      buff[13]= (uchar) MYSQL_TYPE_VIRTUAL;
+    }
     int2store(buff+15, field->comment.length);
     comment_length+= field->comment.length;
     set_if_bigger(int_count,field->interval_id);
@@ -1039,6 +1047,34 @@ static bool pack_fields(File file, List<Create_field> &create_fields,
 	  DBUG_RETURN(1);
     }
   }
+  if (vcol_info_length)
+  {
+    it.rewind();
+    int_count=0;
+    while ((field=it++))
+    {
+      /*
+        Pack each virtual field as follows:
+        byte 1      = 1 (always 1 to allow for future extensions)
+        byte 2      = sql_type
+        byte 3      = flags (as of now, 0 - no flags, 1 - field is physically stored)
+        byte 4-...  = virtual column expression (text data)
+      */
+      if (field->vcol_info && field->vcol_info->expr_str.length)
+      {
+        buff[0]= (uchar)1;
+        buff[1]= (uchar) field->sql_type;
+        buff[2]= (uchar) field->stored_in_db;
+        if (my_write(file, buff, 3, MYF_RW))
+          DBUG_RETURN(1);
+        if (my_write(file,
+                     (uchar*) field->vcol_info->expr_str.str,
+                     field->vcol_info->expr_str.length,
+                     MYF_RW))
+          DBUG_RETURN(1);
+      }
+    }
+  }
   DBUG_RETURN(0);
 }
 
@@ -1073,7 +1109,6 @@ static bool make_empty_rec(THD *thd, File file,enum legacy_db_type table_type,
   }
 
   table.in_use= thd;
-  table.s->db_low_byte_first= handler->low_byte_first();
   table.s->blob_ptr_size= portable_sizeof_char_ptr;
 
   null_count=0;
diff --git a/sql/unireg.h b/sql/unireg.h
index 1bf956efb4f..f1066dbccb8 100644
--- a/sql/unireg.h
+++ b/sql/unireg.h
@@ -55,7 +55,6 @@ typedef struct st_ha_create_information HA_CREATE_INFO;
                        ER_ERROR_FIRST])
 #define ER_THD_OR_DEFAULT(thd,X) ((thd) ? ER_THD(thd, X) : ER_DEFAULT(X))
 
-
 #define ME_INFO (ME_HOLDTANG+ME_OLDWIN+ME_NOREFRESH)
 #define ME_ERROR (ME_BELL+ME_OLDWIN+ME_NOREFRESH)
 #define MYF_RW MYF(MY_WME+MY_NABP)		/* Vid my_read & my_write */
@@ -161,6 +160,13 @@ typedef struct st_ha_create_information HA_CREATE_INFO;
 
 #define DEFAULT_KEY_CACHE_NAME "default"
 
+/* The length of the header part for each virtual column in the .frm file */
+#define FRM_VCOL_HEADER_SIZE 3
+
+/* Maximum length of the defining expression for a virtual columns */
+#define VIRTUAL_COLUMN_EXPRESSION_MAXLEN 255 - FRM_VCOL_HEADER_SIZE
+
+
 /* Include prototypes for unireg */
 
 #include "mysqld_error.h"
diff --git a/sql/winservice.c b/sql/winservice.c
new file mode 100644
index 00000000000..562f047fa79
--- /dev/null
+++ b/sql/winservice.c
@@ -0,0 +1,247 @@
+/*
+  Get Properties of an existing mysqld Windows service 
+*/
+
+#include <windows.h>
+#include <winsvc.h>
+#include "winservice.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+
+/*
+  Get version from an executable file
+*/
+void get_file_version(const char *path, int *major, int *minor, int *patch)
+{
+  DWORD version_handle;
+  char *ver= 0;
+  VS_FIXEDFILEINFO info;
+  UINT len;
+  DWORD size;
+  void *p;
+  *major= *minor= *patch= 0;
+
+  size= GetFileVersionInfoSize(path, &version_handle);
+  if (size == 0) 
+    return;
+  ver= (char *)malloc(size);
+  if(!GetFileVersionInfo(path, version_handle, size, ver))
+    goto end;
+
+  if(!VerQueryValue(ver,"\\",&p,&len))
+    goto end;
+  memcpy(&info,p ,sizeof(VS_FIXEDFILEINFO));
+
+  *major= (info.dwFileVersionMS & 0xFFFF0000) >> 16;
+  *minor= (info.dwFileVersionMS & 0x0000FFFF);
+  *patch= (info.dwFileVersionLS & 0xFFFF0000) >> 16;
+end:
+  free(ver);
+}
+
+void normalize_path(char *path, size_t size)
+{
+  char buf[MAX_PATH];
+  if (*path== '"')
+  {
+    char *p;
+    strcpy_s(buf, MAX_PATH, path+1);
+    p= strchr(buf, '"');
+    if (p) 
+      *p=0;
+  }
+  else
+    strcpy_s(buf, MAX_PATH,  path);
+  GetFullPathName(buf, MAX_PATH, buf, NULL);
+  strcpy_s(path, size,  buf);
+}
+
+/*
+  Retrieve some properties from windows mysqld service binary path.
+  We're interested in ini file location and datadir, and also in version of 
+  the data. We tolerate missing mysqld.exe.
+
+  Note that this function carefully avoids using mysql libraries (e.g dbug), 
+  since it is  used in unusual environments (windows installer, MFC), where we
+  do not have much control over how threads are created and destroyed, so we 
+  cannot assume MySQL thread initilization here.
+*/
+int get_mysql_service_properties(const wchar_t *bin_path, 
+  mysqld_service_properties *props)
+{
+  int numargs;
+  wchar_t mysqld_path[MAX_PATH + 4];
+  wchar_t *file_part;
+  wchar_t **args= NULL;
+  int retval= 1;
+  BOOL have_inifile;
+
+  props->datadir[0]= 0;
+  props->inifile[0]= 0;
+  props->mysqld_exe[0]= 0;
+  props->version_major= 0;
+  props->version_minor= 0;
+  props->version_patch= 0;
+
+  args= CommandLineToArgvW(bin_path, &numargs);
+  if(numargs == 2)
+  {
+    /*
+      There are rare cases where service config does not have 
+      --defaults-filein the binary parth . There services were registered with 
+      plain mysqld --install, the data directory is next to "bin" in this case.
+      Service name (second parameter) must be MySQL.
+    */
+    if(wcscmp(args[1], L"MySQL") != 0)
+      goto end;
+    have_inifile= FALSE;
+  }
+  else if(numargs == 3)
+  {
+    have_inifile= TRUE;
+  }
+  else
+  {
+    goto end;
+  }
+
+  if(have_inifile && wcsncmp(args[1], L"--defaults-file=", 16) != 0)
+    goto end;
+
+  GetFullPathNameW(args[0], MAX_PATH, mysqld_path, &file_part);
+
+  if(wcsstr(mysqld_path, L".exe") == NULL)
+    wcscat(mysqld_path, L".exe");
+
+  if(wcsicmp(file_part, L"mysqld.exe") != 0 && 
+    wcsicmp(file_part, L"mysqld.exe") != 0 &&
+    wcsicmp(file_part, L"mysqld-nt.exe") != 0)
+  {
+    /* The service executable is not mysqld. */
+    goto end;
+  }
+
+  wcstombs(props->mysqld_exe, mysqld_path, MAX_PATH);
+  /* If mysqld.exe exists, try to get its version from executable */
+  if (GetFileAttributes(props->mysqld_exe) != INVALID_FILE_ATTRIBUTES)
+  {
+     get_file_version(props->mysqld_exe, &props->version_major, 
+      &props->version_minor, &props->version_patch);
+  }
+
+  if (have_inifile)
+  {
+    /* We have --defaults-file in service definition. */
+    wcstombs(props->inifile, args[1]+16, MAX_PATH);
+    normalize_path(props->inifile, MAX_PATH);
+    if (GetFileAttributes(props->inifile) != INVALID_FILE_ATTRIBUTES)
+    {
+      GetPrivateProfileString("mysqld", "datadir", NULL, props->datadir, MAX_PATH, 
+        props->inifile);
+    }
+    else
+    {
+      /*
+        Service will start even with invalid .ini file, using lookup for
+        datadir relative to mysqld.exe. This is equivalent to the case no ini
+        file used.
+      */
+      props->inifile[0]= 0;
+      have_inifile= FALSE;
+    }
+  }
+
+  if(!have_inifile)
+  {
+    /*
+      Hard, although a rare case, we're guessing datadir and defaults-file.
+      On Windows, defaults-file is traditionally install-root\my.ini 
+      and datadir is install-root\data
+    */
+    char install_root[MAX_PATH];
+    int i;
+    char *p;
+
+    /*
+      Get the  install root(parent of bin directory where mysqld.exe)
+      is located.
+    */
+    strcpy_s(install_root, MAX_PATH, props->mysqld_exe);
+    for (i=0; i< 2; i++)
+    {
+      p= strrchr(install_root, '\\');
+      if(!p)
+        goto end;
+      *p= 0;
+    }
+
+    /* Look for my.ini, my.cnf in the install root */
+    sprintf_s(props->inifile, MAX_PATH, "%s\\my.ini", install_root);
+    if (GetFileAttributes(props->inifile) == INVALID_FILE_ATTRIBUTES)
+    {
+      sprintf_s(props->inifile, MAX_PATH, "%s\\my.cnf", install_root);
+    }
+    if (GetFileAttributes(props->inifile) != INVALID_FILE_ATTRIBUTES)
+    {
+      /* Ini file found, get datadir from there */
+      GetPrivateProfileString("mysqld", "datadir", NULL, props->datadir,
+        MAX_PATH, props->inifile);
+    }
+    else
+    {
+      /* No ini file */
+      props->inifile[0]= 0;
+    }
+
+    /* Try datadir in install directory.*/
+    if (props->datadir[0] == 0)
+    {
+      sprintf_s(props->datadir, MAX_PATH, "%s\\data", install_root);
+    }
+  }
+
+  if (props->datadir[0])
+  {
+    normalize_path(props->datadir, MAX_PATH);
+    /* Check if datadir really exists */
+    if (GetFileAttributes(props->datadir) == INVALID_FILE_ATTRIBUTES)
+      goto end;
+  }
+  else
+  {
+    /* There is no datadir in ini file,  bail out.*/
+    goto end;
+  }
+
+  /*
+    If version could not be determined so far, try mysql_upgrade_info in 
+    database directory.
+  */
+  if(props->version_major == 0)
+  {
+    char buf[MAX_PATH];
+    FILE *mysql_upgrade_info;
+
+    sprintf_s(buf, MAX_PATH, "%s\\mysql_upgrade_info", props->datadir);
+    mysql_upgrade_info= fopen(buf, "r");
+    if(mysql_upgrade_info)
+    {
+      if (fgets(buf, MAX_PATH, mysql_upgrade_info))
+      {
+        int major,minor,patch;
+        if (sscanf(buf, "%d.%d.%d", &major, &minor, &patch) == 3)
+        {
+          props->version_major= major;
+          props->version_minor= minor;
+          props->version_patch= patch;
+        }
+      }
+    }
+  }
+  retval = 0;
+end:
+  LocalFree((HLOCAL)args);
+  return retval;
+}
+\ No newline at end of file
diff --git a/sql/winservice.h b/sql/winservice.h
new file mode 100644
index 00000000000..8957413783f
--- /dev/null
+++ b/sql/winservice.h
@@ -0,0 +1,24 @@
+/*
+  Extract properties of a windows service binary path
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <windows.h> 
+typedef struct mysqld_service_properties_st
+{
+  char mysqld_exe[MAX_PATH];
+  char inifile[MAX_PATH];
+  char datadir[MAX_PATH];
+  int  version_major;
+  int  version_minor;
+  int  version_patch;
+} mysqld_service_properties;
+
+extern int get_mysql_service_properties(const wchar_t *bin_path, 
+  mysqld_service_properties *props);
+
+#ifdef __cplusplus
+}
+#endif